Skip to content

Commit

Permalink
Initial commit. Pattern quasi-quoting broken (due to haskell-src-exts…
Browse files Browse the repository at this point in the history
… not

supporting view patterns).  Committing in order to save the commented out
sections
  • Loading branch information
Michael Sloan committed Jun 29, 2011
0 parents commit 956c4ab
Show file tree
Hide file tree
Showing 6 changed files with 408 additions and 0 deletions.
27 changes: 27 additions & 0 deletions LICENSE
@@ -0,0 +1,27 @@
Copyright (c) Michael Sloan 2011

All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the author nor the names of other contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
17 changes: 17 additions & 0 deletions README
@@ -0,0 +1,17 @@
http://hackage.haskell.org/package/typed-regex

Provides a quasi-quoter for regular expressions which
yields a tuple, of appropriate arity and types,
representing the results of the captures. Allows the user
to specify parsers for captures as inline Haskell. Can
also be used to provide typeful pattern matching in
function definitions and case patterns.

To build / install:

chmod ugo+x Setup.hs
./Setup.hs configure --user
./Setup.hs build
./Setup.hs install

See the haddock or Text/Regex/PCRE/QQT.hs for documentation.
3 changes: 3 additions & 0 deletions Setup.hs
@@ -0,0 +1,3 @@
#!/usr/bin/env runhaskell
import Distribution.Simple
main = defaultMain
303 changes: 303 additions & 0 deletions Text/Regex/PCRE/QQT.hs
@@ -0,0 +1,303 @@
{-# LANGUAGE TemplateHaskell, QuasiQuotes, TupleSections, ParallelListComp, ViewPatterns #-}

-----------------------------------------------------------------------------
-- |
-- Module : Text.Regex.PCRE.QQT
-- Copyright : Michael Sloan 2011
--
-- Maintainer : Michael Sloan (mgsloan@gmail.com)
-- Portability : unportable
-- Stability : experimental
--
-- This module provides a template Haskell quasiquoter for regular
-- expressions, which provides the following features:
--
-- 1) Compile-time checking that the regular expression is valid.
--
-- 2) Arity of resulting tuple based on the number of capture patterns in
-- the regular expression.
--
-- 3) By default utilizes type inference to determine how to parse capture
-- patterns - uses "maybeRead" function, which yields a "(Read a) => Just a"
-- value on successful parse.
--
-- 4) Allows for the inline interpolation of a mapping function String -> a.
--
-- Inspired by / copy-modified from Matt Morrow's regexqq package:
-- http://hackage.haskell.org/packages/archive/regexqq/latest/doc/html/src/Text-Regex-PCRE-QQ.html
-- And code from Erik Charlebois's interpolatedstring-qq package:
-- http://hackage.haskell.org/packages/archive/interpolatedstring-qq/latest/doc/html/Text-InterpolatedString-QQ.html

module Text.Regex.PCRE.QQT (reg, makeExpr, maybeRead, pack, unpack) where

import Text.Regex.PCRE.Light (Regex,PCREOption,PCREExecOption)
import qualified Text.Regex.PCRE.Light as PCRE

--import Control.Monad (liftM)

import qualified Data.ByteString as B
import Data.ByteString.Internal (c2w,w2c)
import Data.Either.Utils (forceEitherMsg)
import Data.List (groupBy, inits, sortBy, isPrefixOf)
--import Data.List.Split (splitOn)
import Data.Maybe (catMaybes, listToMaybe)
import Data.Ord (comparing)
import Data.Char (isSpace)

-- import Debug.Trace

import Language.Haskell.TH
import Language.Haskell.TH.Quote
import Language.Haskell.Meta.Parse

{-
import Language.Haskell.Exts.Fixity
import Language.Haskell.Exts.Extension
import Language.Haskell.Exts.Parser hiding (parseExp, parseType, parsePat)
import Language.Haskell.Meta.Syntax.Translate
-}

type ParseChunk = Either String (Int, String)
type ParseChunks = (Int, String, [(Int, String)])

{- TODO: idea - provide a variant which allows splicing in an expression
that evaluates to regex string. The tricky thing here is that this
splice might contain capture groups - thus, the parser here would need to
be referenced in order to dispatch
-}

reg :: QuasiQuoter
reg = QuasiQuoter
(checkRegex makeExpr . parseIt)
-- (checkRegex makePat . parseIt)
undefined
undefined undefined

-- Gives an error at compile time if the regex string is invalid
checkRegex :: (ParseChunks -> a) -> ParseChunks -> a
checkRegex f x@(_, pat, _) =
forceEitherMsg ("Error compiling regular expression [$reg|" ++ pat ++ "|]")
(PCRE.compileM (pack pat) pcreOpts) `seq` f x

regex :: String -> Regex
regex = flip PCRE.compile pcreOpts . pack

maybeRead :: (Read a) => String -> Maybe a
maybeRead = fmap fst . listToMaybe . reads

-- Template Haskell Code Generation
-------------------------------------------------------------------------------

-- If no processing function, default to "maybeRead".
makeExpr :: ParseChunks -> ExpQ
makeExpr (cnt, pat, exs) = buildExpr cnt pat
. map (processExpr . snd . head)
. groupSortBy (comparing fst)
$ exs ++ ((0, "") : [(i, "maybeRead") | i <- [1..cnt]])
where processExpr "" = Nothing
processExpr xs = Just . forceEitherMsg ("Error while parsing capture mapper " ++ xs)
$ parseExp xs

{-
makePat :: ParseChunks -> PatQ
makePat (cnt, pat, exs) = do
viewExp <- buildExpr cnt pat $ map (liftM fst) ys
return . ViewP viewExp . TupP . map snd $ catMaybes ys
where
ys = map (liftM post . processPat . snd . head)
. groupSortBy (comparing fst)
$ exs ++ [(i, "") | i <- [0..cnt]]
post (ViewP e x) = (e, x)
post x = (VarE (mkName "maybeRead"), x)
processPat "" = Nothing
processPat xs | "->" `isPrefixOf` trimFront xs
= forceParse $ "(maybeRead " ++ xs ++ ")"
processPat xs = forceParse xs
forceParse xs = Just . forceEitherMsg ("Error while parsing capture pattern " ++ xs)
$ parsePat xs
-}

buildExpr :: Int -> String -> [Maybe Exp] -> ExpQ
buildExpr cnt pat hexps = do
vx <- newName "x"
emptyE <- [|""|]
caseExp <- [| fmap (map unpack)
$ PCRE.match (regex pat) (pack $(return $ VarE vx)) pcreExecOpts |]
let mkMatch rcnt xs = Match
(if null xs then ConP (mkName "Nothing") []
else ConP (mkName "Just") . single . ListP $ map VarP xs)
(NormalB . TupE . map (uncurry AppE) . catMaybes
. zipWith (curry floatFst) hexps
$ (map VarE xs) ++ replicate rcnt emptyE)
[]
return . LamE [VarP vx] . CaseE caseExp . zipWith mkMatch [cnt+1,cnt..]
$ inits [mkName $ "r" ++ show i | i <- [0..cnt]]

-- Parsing
-------------------------------------------------------------------------------

parseIt :: String -> ParseChunks
parseIt xs = (cnt, concat [x | Left x <- results]
, [(i, x) | Right (i, x) <- results])
where (cnt, results) = parseRegex (filter (`notElem` "\r\n") xs) "" True 0

-- A pair of mutually-recursive functions, one for processing the quotation
-- and the other for the anti-quotation.

parseRegex :: String -> String -> Bool -> Int -> (Int, [ParseChunk])
parseRegex inp s b ix = case inp of
-- Disallow branch-reset capture.
('(':'?':'|':_) ->
error "Branch reset pattern (?| not allowed in fancy quasi-quoted regex."


-- Ignore non-capturing parens / handle backslash escaping
('\\':'\\' :xs) -> parseRegex xs ("\\\\" ++ s) False ix
('\\':'(' :xs) -> parseRegex xs (")\\" ++ s) False ix
('\\':')' :xs) -> parseRegex xs ("(\\" ++ s) False ix
('(':'?':':':xs) -> parseRegex xs (":?(" ++ s) False ix

-- Anti-quote for processing a capture group.
('(':'{':xs) -> mapSnd ((Left $ reverse ('(':s)) :)
$ parseHaskell xs "" (ix+1)

-- Anti-quote for processing the whole match. Only applies at the beginning.
('{':xs) -> if b then parseHaskell xs "" ix
else parseRegex xs ('{':s) False ix

-- Keep track of how many capture groups we've seen.
('(':xs) -> parseRegex xs ('(':s) False (ix+1)

-- Consume the regular expression contents.
(x:xs) -> parseRegex xs (x:s) False ix
[] -> (ix, [Left $ reverse s])

parseHaskell :: String -> String -> Int -> (Int, [ParseChunk])
parseHaskell inp s ix = case inp of
-- Escape } in the Haskell splice using a backslash.
('\\':'}':xs) -> parseHaskell xs ('}':s) ix

-- Capture accumulated antiquote, and continue parsing regex literal.
('}':xs) -> mapSnd ((Right (ix, reverse s)):)
$ parseRegex xs "" False ix

-- Consume the antiquoute contents, appending to a reverse accumulator.
(x:xs) -> parseHaskell xs (x:s) ix
[] -> error "Regular-expression Haskell splice is never terminated with a trailing }"

-- TODO: provide bytestring variant.

-- Utils
-------------------------------------------------------------------------------

pack :: String -> B.ByteString
pack = B.pack . fmap c2w

unpack :: B.ByteString -> String
unpack = fmap w2c . B.unpack

-- TODO: allow for a bundle of parameters to be passed in at the beginning
-- of the quasiquote. Would also be a juncture for informing arity /
-- extension information.

pcreOpts :: [PCREOption]
pcreOpts =
[ PCRE.extended
, PCRE.multiline ]
-- , dotall, caseless, utf8
-- , newline_any, PCRE.newline_crlf ]

pcreExecOpts :: [PCREExecOption]
pcreExecOpts = []
-- [ PCRE.exec_newline_crlf
-- , exec_newline_any, PCRE.exec_notempty
-- , PCRE.exec_notbol, PCRE.exec_noteol ]

trimFront :: String -> String
trimFront = dropWhile isSpace

single :: a -> [a]
single x = [x]

groupSortBy :: (a -> a -> Ordering) -> [a] -> [[a]]
groupSortBy f = groupBy (\x -> (==EQ) . f x) . sortBy f

-- debug :: (Show a) => a -> a
-- debug x = trace (show x) x

mapFst :: (a -> c) -> (a, b) -> (c, b)
mapFst f (x, y) = (f x, y)

mapSnd :: (b -> c) -> (a, b) -> (a, c)
mapSnd f (x, y) = (x, f y)

floatFst :: (Functor f) => (f a, b) -> f (a, b)
floatFst (x, y) = fmap (,y) x

floatSnd :: (Functor f) => (a, f b) -> f (a, b)
floatSnd (x, y) = fmap (x,) y

mapLeft :: (a -> a') -> Either a b -> Either a' b
mapLeft f = either (Left . f) Right

mapRight :: (b -> b') -> Either a b -> Either a b'
mapRight f = either Left (Right . f)

{- NVM - haskell-src does not support view patterns.
-- Modified version of Language.Haskell.Meta.Parse
-------------------------------------------------------------------------------
-- Only change is inluding "ViewPatterns" in extensions.
-- Kinda terrible that I have to do this - it would be nice if it was
-- possible to know / reflect on the compilation context in order to
-- retrieve arity and extensions information.
myDefaultExtensions :: [Extension]
myDefaultExtensions = [PostfixOperators
,QuasiQuotes
,UnicodeSyntax
,PatternSignatures
,MagicHash
,ForeignFunctionInterface
,TemplateHaskell
,RankNTypes
,MultiParamTypeClasses
,RecursiveDo
,ViewPatterns]
myDefaultParseMode :: ParseMode
myDefaultParseMode = ParseMode
{parseFilename = []
,extensions = myDefaultExtensions
,ignoreLinePragmas = False
,ignoreLanguagePragmas = False
,fixities = Just baseFixities }
-- Modified this to remove src location info
parseResultToEither :: ParseResult a -> Either String a
parseResultToEither (ParseOk a) = Right a
parseResultToEither (ParseFailed loc e) = Left e
parseExp = mapRight toExp . parseResultToEither . parseExpWithMode myDefaultParseMode
parsePat = mapRight toPat . parseResultToEither . parsePatWithMode myDefaultParseMode
-}

{-
--TODO: use something like this to cache compilde regex.
-- http://stackoverflow.com/questions/141650/how-do-you-make-a-generic-memoize-function-in-haskell
memoize :: Ord a => (a -> b) -> (a -> b)
memoize f = unsafePerformIO $ do
r <- newIORef Map.empty
return $ \ x -> unsafePerformIO $ do
m <- readIORef r
case Map.lookup x m of
Just y -> return y
Nothing -> do
let y = f x
writeIORef r (Map.insert x y m)
return y
-}
26 changes: 26 additions & 0 deletions test.hs
@@ -0,0 +1,26 @@
{-# LANGUAGE TemplateHaskell, QuasiQuotes, ViewPatterns #-}

import Text.Regex.PCRE.QQT
import Debug.Trace

-- mathr ([$reg|({ -> Just x}\d+)\s*\+\s*({id -> y}.+)|]) = x + mathr y

debug x = trace (show x) x

math ([reg|^(\d+)\s*({id}.*)|] -> (Just y, s)) x = math (debug s) y
math ([reg|^\+\s*(\d+)\s*({id}.*)$|] -> (Just y, s)) x = math (debug s) $ x + y
math ([reg|^-\s*(\d+)\s*({id}.*)$|] -> (Just y, s)) x = math (debug s) $ x - y
math ([reg|^\*\s*(\d+)\s*({id}.*)$|] -> (Just y, s)) x = math (debug s) $ x * y
math ([reg|^/\s*(\d+)\s*({id}.*)$|] -> (Just y, s)) x = math (debug s) $ x / y
math [] x = x

{-
*Main> math "2 + 2" 0
4.0
*Main> math "2 + 2 * 5" 0
20.0
*Main> math "2 + 20 * 5" 0
110.0
*Main> math "2 + 20 * 5 / 3" 0
36.666666666666664
-}

0 comments on commit 956c4ab

Please sign in to comment.