In [3]:
grammar_str = """
def long_running():
    # This is a long-running function that takes a while to complete.
    # It simulates a long-running process by sleeping for 5 seconds.
    import time
    time.sleep(1)
    return b"AAAA"

ctx.script("START", ["DUMMY"], lambda dummy: long_running())
ctx.literal("DUMMY", "AAAA")
"""

from morpheus.grammar import Grammar
grammar = Grammar.from_string(grammar_str)
next(grammar.seed_iterator(nt="START"))

b'AAAA'

In [None]:
from morpheus.grammar import Grammar
from nautilus_python import PySerializedSeed, PyGenerator

grammar = Grammar.from_file("/shellphish/grammar-composer/grammars/reference/XML.py")

filenames = os.listdir("/tmp/rons/")
for filename in filenames:
    ron_bytes = grammar.generator.grammar_to_ron(grammar.serialize())
    with open(f"/tmp/rons/{filename}", "wb") as f:
        f.write(ron_bytes)

In [None]:
import os
os.environ["ARTIPHISHELL_FAIL_EARLY"] = "1"

import tqdm
from glob import glob

from morpheus.composable import Composable
from morpheus.database import Database
from morpheus.derivation_tree import DerivationTree
from morpheus.grammar import Grammar
from morpheus.config import REFERENCE_GRAMMARS_FILEPATHS

DB = Database()

# for grammar_name, grammar_path in tqdm.tqdm(REFERENCE_GRAMMARS_FILEPATHS.items()):
#     grammar = Grammar.from_file(grammar_path)
#     DB.import_grammar(grammar, grammar_name=grammar_name)

for filepath in tqdm.tqdm(list(glob("/tmp/rons/*.ron"))[:25]):
    dt = DerivationTree.from_file(filepath)
    DB.import_derivation_tree(dt)


###############################################
with open("/tmp/rons/f3fzimbn.ron", "rb") as f:
    ron_bytes = f.read()

dt = DerivationTree.from_file("/tmp/rons/f3fzimbn.ron")

100%|██████████| 25/25 [00:11<00:00,  2.16it/s]


In [None]:
new_dt = dt.copy()
print(new_dt.grammar.serialize())
# new_dt.grammar.widen()
# print(new_dt.grammar.serialize())

In [None]:
from collections import Counter
from morpheus.composable import Composition
from morpheus.config import REFERENCE_GRAMMARS_FILEPATHS
from morpheus.grammar import Grammar
from morpheus.magic import MIME_TO_NAME

new_compositions = []
for rule_hash, parent_hash in {(node.rule.hexdigest, dt.get_parent(node).rule.hexdigest if dt.get_parent(node) else None) for node in dt.nodes}:
    rows = DB.query_rules_data(rule_hash=rule_hash, parent_hash=parent_hash)
    mimes = rows["mime"]
    encodings = rows["encoding"]
    mime_encoding_pairs = {(mime, encoding) for mime, encoding in zip(mimes, encodings) if mime}
    mime_encoding_counter = Counter(mime_encoding_pairs)
    total = sum(mime_encoding_counter.values())
    for (mime, encoding), count in mime_encoding_counter.items():
        if count / total > 0.2:
            external_grammar_name = MIME_TO_NAME[mime]
            if external_grammar_name in REFERENCE_GRAMMARS_FILEPATHS:
                external_grammar = Grammar.from_file(REFERENCE_GRAMMARS_FILEPATHS[external_grammar_name])
                composition = DB.insert_active_composition(rule_hash, parent_hash, external_grammar, external_rule=None, external_nonterm="START", encoding=encoding)
                if composition:
                    new_compositions.append(composition)
            # if f"{external_grammar_name}@CORPUS" in CORPUS_GRAMMARS_FILEPATHS:
            #     external_grammar = Grammar.from_file(CORPUS_GRAMMARS_FILEPATHS[f"{external_grammar_name}@CORPUS"])
            #     composition = DB.insert_active_composition(rule_hash, parent_hash, external_grammar, external_rule=None, external_nonterm="START", encoding=encoding)
            #     if composition:
            #         new_compositions.append(composition)

None
None
None
None


In [14]:
import numpy as np
encoding = None
DB.active_compositions[
    (DB.active_compositions.encoding == encoding if encoding else DB.active_compositions.encoding.isna())
]

Unnamed: 0,internal_rule_hash,internal_parent_hash,external_grammar_hash,external_rule_hash,external_nonterm,encoding
0,52c5093bb06d2d63fa34e00b3063a6d9696ff237e016ca...,6d871955c4be2b9d1728576265de81c96a200b8c5c8d30...,09da930dc885adb481f0490ed3cefe7d0b208e0753bdd9...,,START,
1,52c5093bb06d2d63fa34e00b3063a6d9696ff237e016ca...,6d871955c4be2b9d1728576265de81c96a200b8c5c8d30...,d7bced5541d241a3ddd926a78c1af1635b8b8eefb637d0...,,START,
2,55e43c4f11d4d6c4a1cc3eca0da0bba9798049e7cc3a09...,,09da930dc885adb481f0490ed3cefe7d0b208e0753bdd9...,,START,
3,55e43c4f11d4d6c4a1cc3eca0da0bba9798049e7cc3a09...,,d7bced5541d241a3ddd926a78c1af1635b8b8eefb637d0...,,START,
4,52c5093bb06d2d63fa34e00b3063a6d9696ff237e016ca...,6d871955c4be2b9d1728576265de81c96a200b8c5c8d30...,09da930dc885adb481f0490ed3cefe7d0b208e0753bdd9...,,START,
5,52c5093bb06d2d63fa34e00b3063a6d9696ff237e016ca...,6d871955c4be2b9d1728576265de81c96a200b8c5c8d30...,d7bced5541d241a3ddd926a78c1af1635b8b8eefb637d0...,,START,
6,55e43c4f11d4d6c4a1cc3eca0da0bba9798049e7cc3a09...,,09da930dc885adb481f0490ed3cefe7d0b208e0753bdd9...,,START,
7,55e43c4f11d4d6c4a1cc3eca0da0bba9798049e7cc3a09...,,d7bced5541d241a3ddd926a78c1af1635b8b8eefb637d0...,,START,


In [None]:
for new_dt in dt.iter_compositions():
    ron = new_dt.to_pyserialized_seed()
    print(ron.unparse_node_to_vec(0))
    # print(new_dt)

# for new_grammar in dt.grammar.iter_compositions():
#     pass

INFO | morpheus | [magic_similarity] Rule XML_DECL.0 matches XML:START with confidence 1.0
INFO | morpheus | Found 1 valid configurations
INFO | morpheus | Building configuration: (0,)


INFO | morpheus | Inserted new rule XML_DECL -> XML_EXTERNAL:START.0
INFO | morpheus | Inserted new rule XML_DECL -> XML@CORPUS_EXTERNAL:START.0


b'<?xml version="1.0"?>\n<document><!--o9.?!R8,.bg0C x?m--><!--Fp,i?R r!Rf,45?--> .o.??khRR<!--54. j2n6,7,,eB-->Q! ?!0?.?.G ,<b\'yBa\'b\' A="4" A="I7Qm0OR72XFrSL"\'/><!--.,4??4,G.jnh,!.5?.-->6R?,,0  <!--g.!6g.z--><b\'F269m\'b\' A="" S="QL6ehuh9w7N3qM003wjERy58446ixgiPC445NF26bp6dqc7e7267N3qN7rvl9Oy5Mjf0Rz4531gat82InH0jXt9cdezBzN9E128b220KN70eL7p77F4u1"\'>b\'<b\\\'y1ONj2ah7\\\'b\\\' tuz9R0kTqnu3L0L66k383D34k568Gl64S3d0="" MIFu3Ncx0DxfpTd1wv9WdYlUx51eLEEwXfYWa0PF1Y9j2P9IQm4Iv7nx4Nn29E="8ormWE4b71pH7nyumsLh32w3n1CrP066oe14dlm0REsKJ0YVAhbLS66f7NrzM" B="w8sfZ8D2ZPcONB43TP7OU08683mx8LbGo666K2BX7Qi0YjX2qZ1D1mLo45CNgd3pHW0kempc5D"\\\'/><b\\\'L98u1WK0w9z430Mx22J12133\\\'b\\\'\\\'/><!----> <b\\\'XCF2RzO1j49zVY01b2J14c0VxybM6i\\\'b\\\'\\\'/><!--O!??P.qK--><b\\\'S8\\\'b\\\' A="" p1="G"\\\'/>,?3,.xKr?.<!--, 5?.,!? e?L? -->??,,e0?Uj,,!,, 96,.08?.?.!.?3 ? .? ?.<b\\\'A\\\'b\\\' Fs4DwCfOD0RSYx="xE1E6jpgL4N0ZR0beJuluf"\\\'/>60wl! uL!!3vH5.<b\\\'R4W309Rjr93780i8xqZP6Drp54R3npAoaD40a0KHQH9D0aD5VPb3vBskZr3

In [None]:
# import os
# os.environ["ARTIPHISHELL_FAIL_EARLY"] = "1"

# from morpheus.grammar import Grammar
# filepath = "/home/ruaronicola/artiphishell/libs/nautilus/grammars/reference/XML.py"
# with open(filepath, "r") as f:
#     grammar_str = f.read()
#     grammar = Grammar.from_string(grammar_str)

from nautilus_python import PySerializedSeed, PyGenerator
# ron_bytes = PyGenerator.grammar_to_ron(grammar_str)

# for _ in range(1000):
#     ron_bytes = PyGenerator.grammar_to_ron(grammar_str)
#     import tempfile
#     with tempfile.NamedTemporaryFile(prefix="/tmp/rons/", suffix=".ron", delete=False) as tmp:
#         tmp.write(ron_bytes)

with open("/tmp/rons/f3fzimbn.ron", "rb") as f:
    ron_bytes = f.read()

ron = PySerializedSeed.from_ron_bytes(ron_bytes)
assert ron.to_ron_bytes() == ron_bytes

unparsed_before = ron.unparse_to_vec()
ron = PySerializedSeed.from_ron_bytes(ron.to_ron_bytes())
unparsed_after = ron.unparse_to_vec()
assert unparsed_before == unparsed_after

output_bytes = ron.unparse_to_vec()

AssertionError: 

In [91]:
from nautilus_python import PyRuleIDOrCustom, PyRuleID, PyTree
new_rules = ron.tree.rules
new_rules[3] = PyRuleIDOrCustom.Custom(PyRuleID(6), [42]*50)
# ron.tree.rules = new_rules
# ron.tree.rules

In [92]:
tree = PyTree(new_rules, ron.tree.sizes, ron.tree.paren)

TypeError: PyTree.__new__() takes 0 positional arguments but 3 were given

In [None]:
import glob
import os
import magic
from collections import defaultdict

from morpheus.grammar import Grammar

MAGIC = magic.Magic(mime=True)

# grammar_str = r"""

# """

for grammar_path in glob.glob("../grammars/reference/draft-*")[:]:
# for grammar_path in glob.glob("../grammars/reference/*CORPUS*")[:]:
    if os.path.isdir(grammar_path):
        continue
    print(f"Processing {grammar_path=}")
    grammar = Grammar.from_file(grammar_path, tree_depth=200)
    mime_counter = defaultdict(int)
    distinct_seeds = set()
    for s in set(grammar.seed_iterator(nt="START", n=1000)):
        mime = MAGIC.from_buffer(s)
        mime_counter[mime] += 1
        distinct_seeds.add(s)
    print(grammar_path, len(distinct_seeds), mime_counter)
    print("-"*20)

Processing grammar_path='../grammars/reference/draft-DOCX.py'
../grammars/reference/draft-DOCX.py 1000 defaultdict(<class 'int'>, {'application/zip': 1000})
--------------------


In [22]:
len(list(set(grammar.seed_iterator(nt="START", n=100)))[0])

273

In [1]:
from morpheus.grammar import Grammar
filepath = "/shellphish/grammar-composer/grammars/_simplified/grammar-guy-tika.py"
old_grammar = Grammar.from_file(filepath)

new_grammar = next(old_grammar.iter_compositions())
print(new_grammar.serialize())

ERROR | morpheus | Generator error in iter_compositions: Grammar.iter_compositions() missing 1 required positional argument: 'profile_db'


StopIteration: 

In [2]:
from morpheus.derivation_tree import DerivationTree
from morpheus.grammar import Grammar

grammar_str = """
ctx.rule("START", "{TEST}")
ctx.rule("TEST", "AAAA")
"""

grammar = Grammar.from_string(grammar_str)
ron_bytes = grammar.generator.grammar_to_ron(grammar.serialize())

dt = DerivationTree.from_ron_bytes(ron_bytes, None)
ron = dt.to_pyserialized_seed()
seed = ron.unparse_node_to_vec(0)
seed

b'AAAA'