Skip to content

Commit

Permalink
add is_identifier, ignore non-identifier globals/members (from sublan…
Browse files Browse the repository at this point in the history
…gs/splits), add splitkeys sublang example
  • Loading branch information
thisismypassport committed Aug 19, 2022
1 parent a881ed3 commit 0b46c15
Show file tree
Hide file tree
Showing 10 changed files with 111 additions and 24 deletions.
63 changes: 58 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ glob = 123
?_ENV[my_key] -- 123
```

For more advanced usecases, see the [below section](#advanced---controlling-renaming-of-identifiers).

### Preserving identifiers across the entire cart

You can instruct the minifier to preserve certain identifiers across the entire cart:
Expand Down Expand Up @@ -402,11 +400,14 @@ eval(--[[language::evally]][[
```

In the python script, provide a class that handles the language via sublanguage_main:
(This is a complete example of what sublanguages can do, you can find a simpler example [below](#Example---simple-sub-language-for-table-parsing)
```python
from pico_process import SubLanguageBase, is_ident_char
from pico_process import SubLanguageBase, is_identifier
from collections import Counter

class MySubLanguage(SubLanguageBase):
# NOTE: all members are optional.

# called to parse the sub-language from a string
# (strings consist of raw pico-8 chars ('\0' to '\xff') - not real unicode)
def __init__(self, str, on_error, **_):
Expand All @@ -419,7 +420,7 @@ class MySubLanguage(SubLanguageBase):

def is_global(self, token):
# is the token a global in our language? e.g. sin / rectfill / g_my_global
return all(is_ident_char(ch) for ch in token) and not token[:1].isdigit()
return is_identifier(token)

def is_member(self, token):
# is the token a member in our language? e.g. .my_member / .x
Expand Down Expand Up @@ -470,7 +471,10 @@ class MySubLanguage(SubLanguageBase):
usages[token[1:]] += 1
return usages

# called to rename all uses of globals and members
# for very advanced languages only, see test_input/sublang.py for details
# def get_local_usages(self, **_):

# called to rename all uses of globals/members/etc
def rename(self, globals, members, **_):
for stmt in self.stmts:
for i, token in enumerate(stmt):
Expand All @@ -488,3 +492,52 @@ def sublanguage_main(lang, **_):
if lang == "evally":
return MySubLanguage
```

### Example - simple sub-language for table parsing

Often it's useful in pico-8 to define a simple sub-language to parse something like this:

`"key1=val1,key2=val2,val3,val4"`

To:

`{key1="val1",key2="val2","val3","val4"}

Here, to minify properly, the keys (key1/key2) should be renamed as members, while the values should be left alone.

The custom python script:
```python
from pico_process import SubLanguageBase, is_identifier
from collections import Counter

class SplitKeysSubLang(SubLanguageBase):
# parses the string
def __init__(self, str, **_):
self.data = [item.split("=") for item in str.split(",")]

# counts usage of keys
# (returned keys are ignored if they're not identifiers)
def get_member_usages(self, **_):
return Counter(item[0] for item in self.data if len(item) > 1)

# renames the keys
def rename(self, members, **_):
for item in self.data:
if len(item) > 1:
item[0] = members.get(item[0], item[0])

# formats back to string
def minify(self, **_):
return ",".join("=".join(item) for item in self.data)

def sublanguage_main(lang, **_):
if lang == "splitkeys":
return SplitKeysSubLang
```

In the code:
```lua
local table = splitkeys(--[[language::splitkeys]]"key1=val1,key2=val2,val3,val4")
?table.key1 -- "val1"
?table[1] -- "val3"
```
28 changes: 19 additions & 9 deletions pico_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,9 @@ def finish(m, path, code):
def is_ident_char(ch):
return '0' <= ch <= '9' or 'a' <= ch <= 'z' or 'A' <= ch <= 'Z' or ch == '_' or ch >= chr(0x80)

def is_identifier(str):
return all(is_ident_char(ch) for ch in str) and not str[:1].isdigit() and str not in keywords

def tokenize(source, ctxt=None):
text = source.text
idx = 0
Expand Down Expand Up @@ -934,17 +937,24 @@ def parse_call(expr, extra_arg=None):

return Node(NodeType.call, tokens, func=expr, args=args)

def add_const_extra_children(node):
token = node.token
def parse_const(token):
node = Node(NodeType.const, [token], token=token)

if getattr(token, "var_kind", None):
node.extra_names = token.value[1:-1].split(",")
for i, value in enumerate(node.extra_names):
subtoken = Token.synthetic(TokenType.ident, value, token)
subtoken.var_kind = token.var_kind
node.add_extra_child(parse_var(token=subtoken, member=True))
if is_identifier(value):
subtoken = Token.synthetic(TokenType.ident, value, token)
subtoken.var_kind = token.var_kind
node.add_extra_child(parse_var(token=subtoken, member=True))
else:
subtoken = Token.synthetic(TokenType.string, value, token)
node.add_extra_child(parse_const(subtoken))

if hasattr(token, "sublang"):
sublang_token = Token.synthetic(TokenType.string, "", token)
node.add_extra_child(Node(NodeType.sublang, (sublang_token,), name=token.sublang_name, lang=token.sublang))

return node

def parse_core_expr():
Expand All @@ -953,7 +963,7 @@ def parse_core_expr():
if value == None:
add_error("unexpected end of input", fail=True)
elif value in ("nil", "true", "false") or token.type in (TokenType.number, TokenType.string):
return add_const_extra_children(Node(NodeType.const, [token], token=token))
return parse_const(token)
elif value == "{":
return parse_table()
elif value == "(":
Expand Down Expand Up @@ -1464,7 +1474,7 @@ def preprocess_vars(node):

elif node.type == NodeType.sublang:
for glob in node.lang.get_defined_globals():
if glob not in custom_globals:
if glob not in custom_globals and is_identifier(glob):
custom_globals.add(glob)
vars[glob].append(root.globals[glob])

Expand Down Expand Up @@ -1676,14 +1686,14 @@ def collect_idents_pre(node):
# slight dup of compute_effective_kind logic

for name, count in node.lang.get_global_usages().items():
if name not in global_knowns:
if name not in global_knowns and is_identifier(name):
if name in all_globals:
global_knowns.add(name)
else:
global_uses[name] += count

for name, count in node.lang.get_member_usages().items():
if name not in member_knowns:
if name not in member_knowns and is_identifier(name):
member_uses[name] += count

for var, count in node.lang.get_local_usages().items():
Expand Down
2 changes: 1 addition & 1 deletion pico_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def parse_p8scii(str):

start = pos + length

def bytes_to_string_contents(bytes):
def bytes_to_string_contents(bytes): # TODO: just use format_string_literal...
data = []

esc_map = {
Expand Down
2 changes: 1 addition & 1 deletion test_compare/output.p8
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ function f.subfunc()end function f:subfunc()end
?f:subfunc()
local f="o"local d={o=123}
?d[f]
local f=split"c,a,f"local d={c=123,a=234,f=345}
local f=split"c,a,f,123"local d={c=123,a=234,f=345}
?d[f[2]]
local f="n"n=123
?_ENV[f]
Expand Down
2 changes: 1 addition & 1 deletion test_compare/output_semiob.p8
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ local c = "key"
local a = {key=123}
?a[c]

local c = split "key1,key2,key3"
local c = split "key1,key2,key3,123"
local a = {key1=123,key2=234,key3=345}
?a[c[2]]

Expand Down
2 changes: 1 addition & 1 deletion test_compare/output_tokens.p8
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ local my_key = --[[member]]"key"
local my_obj = {key=123}
?my_obj[my_key]

local my_keys = split --[[member]]"key1,key2,key3"
local my_keys = split --[[member]]"key1,key2,key3,123"
local my_obj = {key1=123,key2=234,key3=345}
?my_obj[my_keys[2]]

Expand Down
6 changes: 3 additions & 3 deletions test_compare/sublang.p8
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
pico-8 cartridge // http://www.pico-8.com
version 36
__lua__
u=123function f()end f[[circfill 50 50 20 7
f=123function e()end e[[circfill 50 50 20 7
n <- pack
rawset n f u
rawset n u c]]print(n)print(n.f)f""
rawset n e f
rawset n i d]]print(n)print(n.e)e""function i()end i"d=1,f=2,0.5=13,val,f=22,if=bad"
2 changes: 1 addition & 1 deletion test_input/input.p8
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ local my_key = --[[member]]"key"
local my_obj = {key=123}
?my_obj[my_key]

local my_keys = split --[[member]]"key1,key2,key3"
local my_keys = split --[[member]]"key1,key2,key3,123"
local my_obj = {key1=123,key2=234,key3=345}
?my_obj[my_keys[2]]

Expand Down
2 changes: 2 additions & 0 deletions test_input/sublang.p8
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ eval--[[language::evally]][[
print(g_another_glob)
print(g_another_glob.some_member)
eval--[[language::empty]]""
function splitkeys() end
splitkeys--[[language::splitkeys]]"key1=1,key2=2,0.5=13,val,key2=22,if=bad"
26 changes: 24 additions & 2 deletions test_input/sublang.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pico_process import SubLanguageBase, is_ident_char, Local, Scope
from pico_process import SubLanguageBase, is_identifier, Local, Scope
from collections import Counter

class MySubLanguage(SubLanguageBase):
Expand All @@ -14,7 +14,7 @@ def __init__(self, str, on_error, **_):

def is_global(self, token):
# is the token a global in our language? e.g. sin / rectfill / g_my_global
return all(is_ident_char(ch) for ch in token) and not token[:1].isdigit()
return is_identifier(token)

def is_member(self, token):
# is the token a member in our language? e.g. .my_member / .x
Expand Down Expand Up @@ -96,9 +96,31 @@ def rename(self, globals, members, locals, **_):
def minify(self, **_):
return "\n".join(" ".join(stmt) for stmt in self.stmts)

class SplitKeysSubLang(SubLanguageBase):
# parses the string
def __init__(self, str, **_):
self.data = [item.split("=") for item in str.split(",")]

# counts usage of keys
# (returned keys are ignored if they're not identifiers)
def get_member_usages(self, **_):
return Counter(item[0] for item in self.data if len(item) > 1)

# renames the keys
def rename(self, members, **_):
for item in self.data:
if len(item) > 1:
item[0] = members.get(item[0], item[0])

# formats back to string
def minify(self, **_):
return ",".join("=".join(item) for item in self.data)

# this is called to get a sub-languge class by name
def sublanguage_main(lang, **_):
if lang == "evally":
return MySubLanguage
elif lang == "splitkeys":
return SplitKeysSubLang
elif lang == "empty":
return SubLanguageBase

0 comments on commit 0b46c15

Please sign in to comment.