Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Represent PureScript strings internally as sequence of Word16 instead of Text #2488

Merged
merged 5 commits into from Jan 4, 2017
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 9 additions & 0 deletions examples/passing/StringEdgeCases.purs
@@ -0,0 +1,9 @@
module Main where

import Prelude
import Records as Records
import Symbols as Symbols

main = do
Records.main
Symbols.main
66 changes: 66 additions & 0 deletions examples/passing/StringEdgeCases/Records.purs
@@ -0,0 +1,66 @@
module Records where

import Prelude
import Data.Generic (class Generic, toSpine, GenericSpine(..))
import Control.Monad.Eff.Console (log)
import Test.Assert (assert')

newtype AstralKeys = AstralKeys { "💡" :: Int, "💢" :: Int }
newtype LoneSurrogateKeys = LoneSurrogateKeys { "\xdf06" :: Int, "\xd834" :: Int }

derive instance genericAstralKeys :: Generic AstralKeys
derive instance genericLoneSurrogateKeys :: Generic LoneSurrogateKeys

spineOf :: forall a. Generic a => a -> Unit -> GenericSpine
spineOf x _ = toSpine x

testLoneSurrogateKeys =
let
expected = 5
actual = (_."\xd801" <<< helper) { "\xd800": 5 }
in
assert' ("lone surrogate keys: " <> show actual) (expected == actual)

where
helper :: { "\xd800" :: Int } -> { "\xd801" :: Int }
helper o =
case o."\xd800" of
x -> { "\xd801": x }

testAstralKeys =
let
expected = 5
actual = (_."💢" <<< helper) { "💡": 5 }
in
assert' ("astral keys: " <> show actual) (expected == actual)

where
helper :: { "💡" :: Int } -> { "💢" :: Int }
helper o =
case o."💡" of
x -> { "💢": x }

testGenericLoneSurrogateKeys = do
let expected = SProd "Records.LoneSurrogateKeys"
[ \_ -> SRecord [ {recLabel: "\xd834", recValue: spineOf 1}
, {recLabel: "\xdf06", recValue: spineOf 0}
]
]
actual = toSpine (LoneSurrogateKeys { "\xdf06": 0, "\xd834": 1 })
assert' ("generic lone surrogate keys: " <> show actual) (expected == actual)

testGenericAstralKeys = do
let expected = SProd "Records.AstralKeys"
[ \_ -> SRecord [ {recLabel: "💡", recValue: spineOf 0}
, {recLabel: "💢", recValue: spineOf 1}
]
]
actual = toSpine (AstralKeys { "💡": 0, "💢": 1 })
assert' ("generic astral keys: " <> show actual) (expected == actual)

main = do
testLoneSurrogateKeys
testAstralKeys
testGenericLoneSurrogateKeys
testGenericAstralKeys
log "Done"
30 changes: 30 additions & 0 deletions examples/passing/StringEdgeCases/Symbols.purs
@@ -0,0 +1,30 @@
-- This is similar to StringEscapes except we are performing the same tests
-- with Symbols (at the type level).

module Symbols where

import Prelude
import Control.Monad.Eff.Console (log)
import Type.Data.Symbol (SProxy(..), class AppendSymbol, appendSymbol, reflectSymbol)
import Test.Assert (assert')

highS :: SProxy "\xd834"
highS = SProxy

lowS :: SProxy "\xdf06"
lowS = SProxy

loneSurrogates :: Boolean
loneSurrogates = reflectSymbol (appendSymbol highS lowS) == "\x1d306"

outOfOrderSurrogates :: Boolean
outOfOrderSurrogates = reflectSymbol (appendSymbol lowS highS) == "\xdf06\xd834"

notReplacing :: Boolean
notReplacing = reflectSymbol lowS /= "\xfffd"

main = do
assert' "lone surrogates may be combined into a surrogate pair" loneSurrogates
assert' "lone surrogates may be combined out of order to remain lone surrogates" outOfOrderSurrogates
assert' "lone surrogates are not replaced with the Unicode replacement character U+FFFD" notReplacing
log "Done"
2 changes: 1 addition & 1 deletion examples/passing/StringEscapes.purs
Expand Up @@ -22,5 +22,5 @@ main = do
assert' "astral code points are represented as a UTF-16 surrogate pair" surrogatePair
assert' "lone surrogates may be combined into a surrogate pair" loneSurrogates
assert' "lone surrogates may be combined out of order to remain lone surrogates" outOfOrderSurrogates
-- assert' "lone surrogates are not replaced with the Unicode replacement character U+FFFD" notReplacing
assert' "lone surrogates are not replaced with the Unicode replacement character U+FFFD" notReplacing
log "Done"
3 changes: 3 additions & 0 deletions purescript.cabal
Expand Up @@ -141,6 +141,7 @@ library
protolude >= 0.1.6,
regex-tdfa -any,
safe >= 0.3.9 && < 0.4,
scientific >= 0.3.4.9 && < 0.4,
semigroups >= 0.16.2 && < 0.19,
sourcemap >= 0.1.6,
spdx == 0.2.*,
Expand Down Expand Up @@ -194,6 +195,7 @@ library
Language.PureScript.Errors
Language.PureScript.Errors.JSON
Language.PureScript.Kinds
Language.PureScript.Label
Language.PureScript.Linter
Language.PureScript.Linter.Exhaustive
Language.PureScript.Linter.Imports
Expand All @@ -214,6 +216,7 @@ library
Language.PureScript.Pretty.Kinds
Language.PureScript.Pretty.Types
Language.PureScript.Pretty.Values
Language.PureScript.PSString
Language.PureScript.Renamer
Language.PureScript.Sugar
Language.PureScript.Sugar.BindingGroups
Expand Down
14 changes: 8 additions & 6 deletions src/Language/PureScript/AST/Declarations.hs
Expand Up @@ -18,6 +18,8 @@ import Language.PureScript.AST.Literals
import Language.PureScript.AST.Operators
import Language.PureScript.AST.SourcePos
import Language.PureScript.Types
import Language.PureScript.PSString (PSString)
import Language.PureScript.Label (Label)
import Language.PureScript.Names
import Language.PureScript.Kinds
import Language.PureScript.TypeClassDictionaries
Expand Down Expand Up @@ -90,7 +92,7 @@ data SimpleErrorMessage
| CannotDerive (Qualified (ProperName 'ClassName)) [Type]
| InvalidNewtypeInstance (Qualified (ProperName 'ClassName)) [Type]
| CannotFindDerivingType (ProperName 'TypeName)
| DuplicateLabel Text (Maybe Expr)
| DuplicateLabel Label (Maybe Expr)
| DuplicateValueDeclaration Ident
| ArgListLengthsDiffer Ident
| OverlappingArgNames (Maybe Ident)
Expand All @@ -99,8 +101,8 @@ data SimpleErrorMessage
| ExpectedType Type Kind
| IncorrectConstructorArity (Qualified (ProperName 'ConstructorName))
| ExprDoesNotHaveType Expr Type
| PropertyIsMissing Text
| AdditionalProperty Text
| PropertyIsMissing Label
| AdditionalProperty Label
| TypeSynonymInstance
| OrphanInstance Ident (Qualified (ProperName 'ClassName)) [Type]
| InvalidNewtype (ProperName 'TypeName)
Expand Down Expand Up @@ -145,7 +147,7 @@ data ErrorMessageHint
| ErrorInModule ModuleName
| ErrorInInstance (Qualified (ProperName 'ClassName)) [Type]
| ErrorInSubsumption Type Type
| ErrorCheckingAccessor Expr Text
| ErrorCheckingAccessor Expr PSString
| ErrorCheckingType Expr Type
| ErrorCheckingKind Type
| ErrorCheckingGuard
Expand Down Expand Up @@ -573,11 +575,11 @@ data Expr
-- Anonymous arguments will be removed during desugaring and expanded
-- into a lambda that reads a property from a record.
--
| Accessor Text Expr
| Accessor PSString Expr
-- |
-- Partial record update
--
| ObjectUpdate Expr [(Text, Expr)]
| ObjectUpdate Expr [(PSString, Expr)]
-- |
-- Function introduction
--
Expand Down
6 changes: 3 additions & 3 deletions src/Language/PureScript/AST/Literals.hs
Expand Up @@ -4,7 +4,7 @@
module Language.PureScript.AST.Literals where

import Prelude.Compat
import Data.Text (Text)
import Language.PureScript.PSString (PSString)

-- |
-- Data type for literal values. Parameterised so it can be used for Exprs and
Expand All @@ -18,7 +18,7 @@ data Literal a
-- |
-- A string literal
--
| StringLiteral Text
| StringLiteral PSString
-- |
-- A character literal
--
Expand All @@ -34,5 +34,5 @@ data Literal a
-- |
-- An object literal
--
| ObjectLiteral [(Text, a)]
| ObjectLiteral [(PSString, a)]
deriving (Eq, Ord, Show, Functor)
34 changes: 20 additions & 14 deletions src/Language/PureScript/CodeGen/JS.hs
Expand Up @@ -20,6 +20,7 @@ import qualified Data.Foldable as F
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isNothing)
import Data.Monoid ((<>))
import Data.String (fromString)
import Data.Text (Text)
import qualified Data.Text as T

Expand All @@ -34,6 +35,7 @@ import Language.PureScript.Errors (ErrorMessageHint(..), SimpleErrorMessage(..),
errorMessage, rethrowWithPosition, addHint)
import Language.PureScript.Names
import Language.PureScript.Options
import Language.PureScript.PSString (PSString, mkString, decodeString)
import Language.PureScript.Traversals (sndM)
import qualified Language.PureScript.Constants as C

Expand Down Expand Up @@ -65,8 +67,8 @@ moduleToJs (Module coms mn imps exps foreigns decls) foreign_ =
let moduleBody = header : foreign' ++ jsImports ++ concat optimized
let foreignExps = exps `intersect` (fst `map` foreigns)
let standardExps = exps \\ foreignExps
let exps' = JSObjectLiteral Nothing $ map (runIdent &&& JSVar Nothing . identToJs) standardExps
++ map (runIdent &&& foreignIdent) foreignExps
let exps' = JSObjectLiteral Nothing $ map (mkString . runIdent &&& JSVar Nothing . identToJs) standardExps
++ map (mkString . runIdent &&& foreignIdent) foreignExps
return $ moduleBody ++ [JSAssignment Nothing (JSAccessor Nothing "exports" (JSVar Nothing "module")) exps']

where
Expand Down Expand Up @@ -108,7 +110,7 @@ moduleToJs (Module coms mn imps exps foreigns decls) foreign_ =
importToJs :: M.Map ModuleName (Ann, ModuleName) -> ModuleName -> m JS
importToJs mnLookup mn' = do
let ((ss, _, _, _), mnSafe) = fromMaybe (internalError "Missing value in mnLookup") $ M.lookup mn' mnLookup
let moduleBody = JSApp Nothing (JSVar Nothing "require") [JSStringLiteral Nothing (T.pack (".." </> T.unpack (runModuleName mn')))]
let moduleBody = JSApp Nothing (JSVar Nothing "require") [JSStringLiteral Nothing (fromString (".." </> T.unpack (runModuleName mn')))]
withPos ss $ JSVariableIntroduction Nothing (moduleNameToJs mnSafe) (Just moduleBody)

-- |
Expand Down Expand Up @@ -176,12 +178,16 @@ moduleToJs (Module coms mn imps exps foreigns decls) foreign_ =
-- indexer is returned.
--
accessor :: Ident -> JS -> JS
accessor (Ident prop) = accessorString prop
accessor (Ident prop) = accessorString $ mkString prop
accessor (GenIdent _ _) = internalError "GenIdent in accessor"

accessorString :: Text -> JS -> JS
accessorString prop | identNeedsEscaping prop = JSIndexer Nothing (JSStringLiteral Nothing prop)
| otherwise = JSAccessor Nothing prop
accessorString :: PSString -> JS -> JS
accessorString prop =
case decodeString prop of
Just s | not (identNeedsEscaping s) ->
JSAccessor Nothing s
_ ->
JSIndexer Nothing (JSStringLiteral Nothing prop)

-- |
-- Generate code in the simplified Javascript intermediate representation for a value or expression.
Expand Down Expand Up @@ -212,7 +218,7 @@ moduleToJs (Module coms mn imps exps foreigns decls) foreign_ =
unAbs (Abs _ arg val) = arg : unAbs val
unAbs _ = []
assign :: Ident -> JS
assign name = JSAssignment Nothing (accessorString (runIdent name) (JSVar Nothing "this"))
assign name = JSAssignment Nothing (accessorString (mkString $ runIdent name) (JSVar Nothing "this"))
(var name)
valueToJs' (Abs _ arg val) = do
ret <- valueToJs val
Expand Down Expand Up @@ -272,15 +278,15 @@ moduleToJs (Module coms mn imps exps foreigns decls) foreign_ =
literalToValueJS (NumericLiteral (Left i)) = return $ JSNumericLiteral Nothing (Left i)
literalToValueJS (NumericLiteral (Right n)) = return $ JSNumericLiteral Nothing (Right n)
literalToValueJS (StringLiteral s) = return $ JSStringLiteral Nothing s
literalToValueJS (CharLiteral c) = return $ JSStringLiteral Nothing (T.singleton c)
literalToValueJS (CharLiteral c) = return $ JSStringLiteral Nothing (fromString [c])
literalToValueJS (BooleanLiteral b) = return $ JSBooleanLiteral Nothing b
literalToValueJS (ArrayLiteral xs) = JSArrayLiteral Nothing <$> mapM valueToJs xs
literalToValueJS (ObjectLiteral ps) = JSObjectLiteral Nothing <$> mapM (sndM valueToJs) ps

-- |
-- Shallow copy an object.
--
extendObj :: JS -> [(Text, JS)] -> m JS
extendObj :: JS -> [(PSString, JS)] -> m JS
extendObj obj sts = do
newObj <- freshName
key <- freshName
Expand Down Expand Up @@ -317,7 +323,7 @@ moduleToJs (Module coms mn imps exps foreigns decls) foreign_ =
qualifiedToJS f (Qualified _ a) = JSVar Nothing $ identToJs (f a)

foreignIdent :: Ident -> JS
foreignIdent ident = accessorString (runIdent ident) (JSVar Nothing "$foreign")
foreignIdent ident = accessorString (mkString $ runIdent ident) (JSVar Nothing "$foreign")

-- |
-- Generate code in the simplified Javascript intermediate representation for pattern match binders
Expand All @@ -341,7 +347,7 @@ moduleToJs (Module coms mn imps exps foreigns decls) foreign_ =
go _ _ _ = internalError "Invalid arguments to bindersToJs"

failedPatternError :: [Text] -> JS
failedPatternError names = JSUnary Nothing JSNew $ JSApp Nothing (JSVar Nothing "Error") [JSBinary Nothing Add (JSStringLiteral Nothing failedPatternMessage) (JSArrayLiteral Nothing $ zipWith valueError names vals)]
failedPatternError names = JSUnary Nothing JSNew $ JSApp Nothing (JSVar Nothing "Error") [JSBinary Nothing Add (JSStringLiteral Nothing $ mkString failedPatternMessage) (JSArrayLiteral Nothing $ zipWith valueError names vals)]

failedPatternMessage :: Text
failedPatternMessage = "Failed pattern match" <> maybe "" (((" at " <> runModuleName mn <> " ") <>) . displayStartEndPos) maybeSpan <> ": "
Expand Down Expand Up @@ -402,7 +408,7 @@ moduleToJs (Module coms mn imps exps foreigns decls) foreign_ =
literalToBinderJS varName done (NumericLiteral num) =
return [JSIfElse Nothing (JSBinary Nothing EqualTo (JSVar Nothing varName) (JSNumericLiteral Nothing num)) (JSBlock Nothing done) Nothing]
literalToBinderJS varName done (CharLiteral c) =
return [JSIfElse Nothing (JSBinary Nothing EqualTo (JSVar Nothing varName) (JSStringLiteral Nothing (T.singleton c))) (JSBlock Nothing done) Nothing]
return [JSIfElse Nothing (JSBinary Nothing EqualTo (JSVar Nothing varName) (JSStringLiteral Nothing (fromString [c]))) (JSBlock Nothing done) Nothing]
literalToBinderJS varName done (StringLiteral str) =
return [JSIfElse Nothing (JSBinary Nothing EqualTo (JSVar Nothing varName) (JSStringLiteral Nothing str)) (JSBlock Nothing done) Nothing]
literalToBinderJS varName done (BooleanLiteral True) =
Expand All @@ -411,7 +417,7 @@ moduleToJs (Module coms mn imps exps foreigns decls) foreign_ =
return [JSIfElse Nothing (JSUnary Nothing Not (JSVar Nothing varName)) (JSBlock Nothing done) Nothing]
literalToBinderJS varName done (ObjectLiteral bs) = go done bs
where
go :: [JS] -> [(Text, Binder Ann)] -> m [JS]
go :: [JS] -> [(PSString, Binder Ann)] -> m [JS]
go done' [] = return done'
go done' ((prop, binder):bs') = do
propVar <- freshName
Expand Down
8 changes: 5 additions & 3 deletions src/Language/PureScript/CodeGen/JS/AST.hs
Expand Up @@ -11,6 +11,7 @@ import Data.Text (Text)

import Language.PureScript.AST (SourceSpan(..))
import Language.PureScript.Comments
import Language.PureScript.PSString (PSString)
import Language.PureScript.Traversals

-- |
Expand Down Expand Up @@ -132,7 +133,7 @@ data JS
-- |
-- A string literal
--
| JSStringLiteral (Maybe SourceSpan) Text
| JSStringLiteral (Maybe SourceSpan) PSString
-- |
-- A boolean literal
--
Expand All @@ -156,7 +157,7 @@ data JS
-- |
-- An object literal
--
| JSObjectLiteral (Maybe SourceSpan) [(Text, JS)]
| JSObjectLiteral (Maybe SourceSpan) [(PSString, JS)]
-- |
-- An object property accessor expression
--
Expand Down Expand Up @@ -240,7 +241,8 @@ data JS
-- |
-- Commented Javascript
--
| JSComment (Maybe SourceSpan) [Comment] JS deriving (Show, Eq)
| JSComment (Maybe SourceSpan) [Comment] JS
deriving (Show, Eq)

withSourceSpan :: SourceSpan -> JS -> JS
withSourceSpan withSpan = go
Expand Down
3 changes: 2 additions & 1 deletion src/Language/PureScript/CodeGen/JS/Optimizer/Common.hs
Expand Up @@ -11,6 +11,7 @@ import Data.Maybe (fromMaybe)

import Language.PureScript.Crash
import Language.PureScript.CodeGen.JS.AST
import Language.PureScript.PSString (mkString)

applyAll :: [a -> a] -> a -> a
applyAll = foldl' (.) id
Expand Down Expand Up @@ -73,7 +74,7 @@ isFn :: (Text, Text) -> JS -> Bool
isFn (moduleName, fnName) (JSAccessor _ x (JSVar _ y)) =
x == fnName && y == moduleName
isFn (moduleName, fnName) (JSIndexer _ (JSStringLiteral _ x) (JSVar _ y)) =
x == fnName && y == moduleName
x == mkString fnName && y == moduleName
isFn _ _ = False

isDict :: (Text, Text) -> JS -> Bool
Expand Down
2 changes: 1 addition & 1 deletion src/Language/PureScript/CodeGen/JS/Optimizer/Inliner.hs
Expand Up @@ -239,7 +239,7 @@ inlineCommonOperators = applyAll $
isModFn _ _ = False

isModFnWithDict :: (Text, Text) -> JS -> Bool
isModFnWithDict (m, op) (JSApp _ (JSAccessor _ op' (JSVar _ m')) [(JSVar _ _)]) = m == m' && op == op'
isModFnWithDict (m, op) (JSApp _ (JSAccessor _ op' (JSVar _ m')) [JSVar _ _]) = m == m' && op == op'
isModFnWithDict _ _ = False

-- (f <<< g $ x) = f (g x)
Expand Down