# Sygus completions

In [1]:
import os
import sys
from pathlib import Path

CURRENT_DIRECTORY = Path(os.getcwd())
ROOT_DIRECTORY = (CURRENT_DIRECTORY / "..").absolute().resolve()

print(f"Current directory: {CURRENT_DIRECTORY}")
print(f"Root directory: {ROOT_DIRECTORY}")

sys.path.append(str(ROOT_DIRECTORY))

Current directory: /home/ubuntu/arga-arc/sygus
Root directory: /home/ubuntu/arga-arc


In [2]:
import typing as t
from pprint import pprint
from dataclasses import dataclass
import sexpdata as sexp
from sexpdata import Symbol
from openai import OpenAI
from config import CONFIG
from datetime import datetime
import json
import random
import math

OPENAI = OpenAI(organization=CONFIG.OPENAI_ORGANIZATION, api_key=CONFIG.OPENAI_SECRET_KEY)
client = OpenAI(organization=CONFIG.OPENAI_ORGANIZATION, api_key=CONFIG.OPENAI_SECRET_KEY)

In [3]:
BENCHMARKS_DIRECTORY = ROOT_DIRECTORY / "sygus/Probe/src/test/benchmarks"
CIRCUIT_DIRECTORY = BENCHMARKS_DIRECTORY / "circuit/test"
HACKERS_DELIGHT_DIRECTORY = BENCHMARKS_DIRECTORY / "hackers-delight"
LARGER_STRING_GRAMMAR_DIRECTORY = BENCHMARKS_DIRECTORY / "larger-grammar"
STRING_GRAMMAR_DIRECTORY = BENCHMARKS_DIRECTORY / "string"

## Generating Completions

### utils

In [4]:
def is_comment(line: str) -> bool:
    return line.strip().startswith(";")


In [5]:
def is_synth_fun(sexp) -> bool:
    ans = isinstance(sexp[0], Symbol) and sexp[0].value() == "synth-fun"
    return ans

def get_synth_fun(sexps: t.List[t.Any]) -> t.Optional[t.Any]:
    for sexp in sexps:
        if is_synth_fun(sexp):
            return sexp
    return None

def get_signature(synth_fun: t.Any) -> t.Tuple[str, t.List[t.Any], t.Any]:
    name = synth_fun[1].value()
    args = [(arg[0].value(), arg[1].value()) for arg in synth_fun[2]]
    ret_type = synth_fun[3].value()
    return (name, args, ret_type)

def is_constraint(sexp) -> bool:
    return isinstance(sexp[0], Symbol) and sexp[0].value() == "constraint"

def get_constraints(sexps: t.List[t.Any]) -> t.List[t.Any]:
    return [sexp for sexp in sexps if is_constraint(sexp)]

def constraint_to_io(sexp, num_args: int) -> t.Tuple[t.List[str], str]:
    assert is_constraint(sexp)
    eq_exp = sexp[1]
    f_exp = eq_exp[1]
    output_exp = eq_exp[2]
    f_args_exp = f_exp[1:]
    assert len(f_args_exp) == num_args
    return ([str(arg) for arg in f_args_exp], output_exp)

@dataclass
class SygusProblem:
    synth_fun: str
    signature: t.Tuple[str, t.List[t.Any], t.Any]
    examples: t.List[t.Tuple[t.List[str], str]]
    natural_language_spec: str

    @classmethod
    def from_sexps(cls, sexps: t.List[t.Any], comments: t.List[str]) -> "SygusProblem":
        constraints = get_constraints(sexps)
        examples = [constraint_to_io(constraint, num_args) for constraint in constraints]
        return cls.from_sexps_with_examples(sexps, comments, examples)
    
    @classmethod
    def from_sexps_with_examples(cls, sexps: t.List[t.Any], comments: t.List[str], examples: t.List[t.Tuple[str, str]]) -> "SygusProblem":
        synth_fun = get_synth_fun(sexps)
        assert synth_fun is not None
        synth_fun_str = sexp.dumps(synth_fun)
    
        signature = get_signature(synth_fun)
        num_args = len(signature[1])
    
        return cls(synth_fun=synth_fun_str, signature=signature, examples=examples, natural_language_spec="\n".join(comments))

    
    @property
    def num_args(self) -> int:
        return len(self.signature[1])
    
    @property
    def function_definition_prefix(self) -> str:
        return f"(define-fun {self.signature[0]} ({' '.join([f'{arg[0]} {arg[1]}' for arg in self.signature[1]])}) {self.signature[2]}"
    
    def completion_to_function_definition(self, completion: str) -> str:
        return f"{self.function_definition_prefix}\n{completion}"
    
    @property
    def user_message(self) -> str:
        EXAMPLES = ""
        for args, output in self.examples:
            EXAMPLES += f"{', '.join(args)} -> {output}\n"
        return f"""[GRAMMAR]
{self.synth_fun}

[NATURAL LANGUAGE SPECIFICATION]
{self.natural_language_spec}

[EXAMPLES]
{EXAMPLES}

[SOLUTION]
{self.function_definition_prefix}"""

In [6]:
SYSTEM_PROMPT = """You are a coding assistant. Be precise and terse.
You will be given a SyGuS grammar, a natural language specification, and a set of input-output examples.
Your task is to write a program that is correct according to the grammar, specification, and examples."""

In [7]:
def sample_gpt_solutions(problem: SygusProblem, n: int = 10) -> t.Tuple[t.List[str], int]:
    start_time = datetime.now()
    response = OPENAI.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": problem.user_message},
        ],
        n=n,
        temperature=0.5,
    )
    end_time = datetime.now()
    time_diff_ms = (end_time - start_time).microseconds / 1000
    return [problem.completion_to_function_definition(choice.message.content) for choice in response.choices], time_diff_ms

### testing

In [5]:
# for each file in the CIRCUIT_DIRECTORY, print the filename
for file in STRING_GRAMMAR_DIRECTORY.iterdir():
    print(file)

/home/ubuntu/arga-arc/sygus/Probe/src/test/benchmarks/string/get-first-name-from-name-with-comma.sl
/home/ubuntu/arga-arc/sygus/Probe/src/test/benchmarks/string/strip-html-from-text-or-numbers.sl
/home/ubuntu/arga-arc/sygus/Probe/src/test/benchmarks/string/count-total-words-in-a-cell.sl
/home/ubuntu/arga-arc/sygus/Probe/src/test/benchmarks/string/stackoverflow8.sl
/home/ubuntu/arga-arc/sygus/Probe/src/test/benchmarks/string/phone-6-long.sl
/home/ubuntu/arga-arc/sygus/Probe/src/test/benchmarks/string/phone-10.sl
/home/ubuntu/arga-arc/sygus/Probe/src/test/benchmarks/string/phone-6.sl
/home/ubuntu/arga-arc/sygus/Probe/src/test/benchmarks/string/phone-7-short.sl
/home/ubuntu/arga-arc/sygus/Probe/src/test/benchmarks/string/phone-7.sl
/home/ubuntu/arga-arc/sygus/Probe/src/test/benchmarks/string/univ_6_short.sl
/home/ubuntu/arga-arc/sygus/Probe/src/test/benchmarks/string/stackoverflow9.sl
/home/ubuntu/arga-arc/sygus/Probe/src/test/benchmarks/string/phone-9-long-repeat.sl
/home/ubuntu/arga-arc

In [11]:
TEST_GRAMMAR = """(synth-fun f ((x (BitVec 64))) (BitVec 64)
    ((Start (BitVec 64) ((bvnot Start)
						 (bvxor Start Start)
						 (bvand Start Start)
						 (bvor Start Start)
						 (bvneg Start)
						 (bvadd Start Start)
						 (bvmul Start Start)
						 (bvudiv Start Start)
						 (bvurem Start Start)
						 (bvlshr Start Start)
						 (bvashr Start Start)
						 (bvshl Start Start)
						 (bvsdiv Start Start)
						 (bvsrem Start Start)
						 (bvsub Start Start)
                         x
						 #x0000000000000000
                         #x0000000000000001
                         #x0000000000000002
                         #x000000000000001f
                         #xffffffffffffffff
                         (ite StartBool Start Start)))

                         (StartBool Bool
                         ((= Start Start)
                         ))))"""

TEST_NAT_LANG_SPEC = "; Next higher unsigned number with the same number of 1 bits."

TEST_EXAMPLES = [
    ("#x0000000000000001", "#x0000000000000002"),
    ("#x0000000000000003", "#x0000000000000005"),
]

USER_MESSAGE = f"""[GRAMMAR]
{TEST_GRAMMAR}

[NATURAL LANGUAGE SPECIFICATION]
{TEST_NAT_LANG_SPEC}

[EXAMPLES]
{[f"{x} -> {y}" for x, y in TEST_EXAMPLES]}

[SOLUTION]
(define-fun ans ((x (BitVec 64))) (BitVec 64)"""

In [12]:
response = client.chat.completions.create(
  model="gpt-4",
  messages=[
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": USER_MESSAGE},
  ],
  n=10,
  temperature=0.5,
)
pprint([choice.message.content for choice in response.choices])

['(bvadd x #x0000000000000001))',
 '(let ((y (bvor x (bvsub x #x0000000000000001))))\n'
 '  (let ((z (bvand y (bvnot x))))\n'
 '    (bvor (bvshl z (bvlshr (bvadd z #xffffffffffffffff) #x000000000000001f)) '
 'x))))',
 '(let ((y (bvadd x #x0000000000000001)))\n'
 '  (let ((z (bvxor x y)))\n'
 '    (let ((a (bvadd z #x0000000000000001)))\n'
 '      (let ((b (bvlshr a #x0000000000000002)))\n'
 '        (let ((c (bvand b z)))\n'
 '          (let ((d (bvsub y c)))\n'
 '            d)))))))',
 '(bvadd x #x0000000000000001))',
 '(let ((t (bvand x (bvsub x #x0000000000000001))))\n'
 '  (bvadd (bvlshr x (bvadd (bvurem t (bvneg t)) #x0000000000000001)) '
 '#x0000000000000001)))',
 '(bvadd x #x0000000000000001))',
 '(let ((t (bvadd x #x0000000000000001)))\n'
 '  (let ((u (bvxor x t)))\n'
 '    (let ((v (bvadd u #x0000000000000001)))\n'
 '      (let ((w (bvshl t (bvurem u v))))\n'
 '        (bvand w (bvlshr v (bvurem u v))))))))',
 '(bvadd x #x0000000000000001))',
 '(let ((y (bvand x (bvsub x #x00

### Larger String Grammar

In [16]:
LARGER_STRING_GRAMMAR_PROBLEMS = {}

LARGER_STRING_GRAMMAR_COMMENTS = {}

def is_comment(line: str) -> bool:
    return line.strip().startswith(";")


# for each file in the CIRCUIT_DIRECTORY, print the filename
for file in LARGER_STRING_GRAMMAR_DIRECTORY.iterdir():
    print(file.name)
    with open(file, "r") as f:
        contents = file.read_text()
        LARGER_STRING_GRAMMAR_PROBLEMS[file.name] = sexp.loads(contents)
        LARGER_STRING_GRAMMAR_COMMENTS[file.name] = [
            line for line in contents.split("\n") if is_comment(line)
        ]

exceljet2modified.sl
31753108modified.sl
find-nth-occurrence-of-charactermodified.sl
clean-and-reformat-telephone-numbersmodified.sl
17212077modified.sl
11604909modified.sl
stackoverflow4modified.sl
remove-text-by-positionmodified.sl
44789427modified.sl
stackoverflow11modified.sl
stackoverflow3modified.sl
initials-longmodified.sl
phone-7modified.sl
43606446modified.sl
extract-nth-word-from-text-stringmodified.sl
remove-leading-and-trailing-spaces-from-textmodified.sl
phone-5-long-repeatmodified.sl
count-total-words-in-a-cellmodified.sl
stackoverflow8modified.sl
strip-numeric-characters-from-cellmodified.sl
phone-9modified.sl
initialsmodified.sl
phone-10-long-repeatmodified.sl
39060015modified.sl
phone-7-long-repeatmodified.sl
stackoverflow6modified.sl
strip-non-numeric-charactersmodified.sl
stackoverflow1modified.sl
get-last-wordmodified.sl
phone-5modified.sl
phone-6-long-repeatmodified.sl
get-last-line-in-cellmodified.sl
phone-9-shortmodified.sl
38871714modified.sl
phone-5-shortmodifi

In [9]:
pprint(LARGER_STRING_GRAMMAR_PROBLEMS["exceljet2modified.sl"][1])
pprint(LARGER_STRING_GRAMMAR_COMMENTS["exceljet2modified.sl"])

[Symbol('synth-fun'),
 Symbol('f'),
 [[Symbol('_arg_0'), Symbol('String')]],
 Symbol('String'),
 [[Symbol('Start'), Symbol('String'), [Symbol('ntString')]],
  [Symbol('ntString'),
   Symbol('String'),
   [Symbol('_arg_0'),
    '',
    ' ',
    'BRD',
    'DRS',
    'LDS',
    'Branding',
    'Direct Response',
    'Leads',
    '=',
    '/',
    'in',
    '_',
    '9',
    '.',
    'microsoft',
    'windows',
    'apple',
    'mac',
    '-',
    '1',
    '2',
    '3',
    '4',
    '5',
    '6',
    '7',
    '8',
    '0',
    ',',
    '<',
    '>',
    '/n',
    '%',
    'b',
    'apple',
    'bananas',
    'strawberries',
    'oranges',
    'LLC',
    'Inc',
    'Corporation',
    'Enterprises',
    'Company',
    '(',
    ')',
    '+',
    'name',
    ',',
    [Symbol('str.++'), Symbol('ntString'), Symbol('ntString')],
    [Symbol('str.replace'),
     Symbol('ntString'),
     Symbol('ntString'),
     Symbol('ntString')],
    [Symbol('str.at'), Symbol('ntString'), Symbol('ntInt')],
    

In [36]:
LARGER_STRING_GRAMMAR_SYGUS = {}
for filename, sexps in LARGER_STRING_GRAMMAR_PROBLEMS.items():
    pprint(filename)
    LARGER_STRING_GRAMMAR_SYGUS[filename] = SygusProblem.from_sexps(sexps, LARGER_STRING_GRAMMAR_COMMENTS[filename])

'exceljet2modified.sl'
'31753108modified.sl'
'find-nth-occurrence-of-charactermodified.sl'
'clean-and-reformat-telephone-numbersmodified.sl'
'17212077modified.sl'
'11604909modified.sl'
'stackoverflow4modified.sl'
'remove-text-by-positionmodified.sl'
'44789427modified.sl'
'stackoverflow11modified.sl'
'stackoverflow3modified.sl'
'initials-longmodified.sl'
'phone-7modified.sl'
'43606446modified.sl'
'extract-nth-word-from-text-stringmodified.sl'
'remove-leading-and-trailing-spaces-from-textmodified.sl'
'phone-5-long-repeatmodified.sl'
'count-total-words-in-a-cellmodified.sl'
'stackoverflow8modified.sl'
'strip-numeric-characters-from-cellmodified.sl'
'phone-9modified.sl'
'initialsmodified.sl'
'phone-10-long-repeatmodified.sl'
'39060015modified.sl'
'phone-7-long-repeatmodified.sl'
'stackoverflow6modified.sl'
'strip-non-numeric-charactersmodified.sl'
'stackoverflow1modified.sl'
'get-last-wordmodified.sl'
'phone-5modified.sl'
'phone-6-long-repeatmodified.sl'
'get-last-line-in-cellmodified.sl'


In [37]:
pprint(LARGER_STRING_GRAMMAR_SYGUS["exceljet2modified.sl"].user_message)

('[GRAMMAR]\n'
 '(synth-fun f ((_arg_0 String)) String ((Start String (ntString)) (ntString '
 'String (_arg_0 "" " " "BRD" "DRS" "LDS" "Branding" "Direct Response" "Leads" '
 '"=" "/" "in" "_" "9" "." "microsoft" "windows" "apple" "mac" "-" "1" "2" "3" '
 '"4" "5" "6" "7" "8" "0" "," "<" ">" "/n" "%" "b" "apple" "bananas" '
 '"strawberries" "oranges" "LLC" "Inc" "Corporation" "Enterprises" "Company" '
 '"(" ")" "+" "name" "," (str.++ ntString ntString) (str.replace ntString '
 'ntString ntString) (str.at ntString ntInt) (int.to.str ntInt) (ite ntBool '
 'ntString ntString) (str.substr ntString ntInt ntInt))) (ntInt Int (-1 1 2 3 '
 '4 5 6 7 8 9 0 1 0 -1 (+ ntInt ntInt) (- ntInt ntInt) (str.len ntString) '
 '(str.to.int ntString) (ite ntBool ntInt ntInt) (str.indexof ntString '
 'ntString ntInt))) (ntBool Bool (true false (= ntInt ntInt) (str.prefixof '
 'ntString ntString) (str.suffixof ntString ntString) (str.contains ntString '
 'ntString)))))\n'
 '\n'
 '[NATURAL LANGUAGE SPECIFICAT

In [38]:
for filename, problem in LARGER_STRING_GRAMMAR_SYGUS.items():
    pprint(filename)
    pprint(problem.signature)
    pprint(problem.examples)

'exceljet2modified.sl'
('f', [('_arg_0', 'String')], 'String')
[(['www.domain.com'], 'com'),
 (['mail.net'], 'net'),
 (['www.amaon.co.uk'], 'uk')]
'31753108modified.sl'
('f', [('_arg_0', 'String')], 'String')
[(['Tire Pressure ABC123873 Monitor'], 'ABC123873'),
 ([' Oil Life ABC849999999021 gauge'], 'ABC849999999021'),
 ([' Air conditioner GHF211 maintenance'], 'GHF211')]
'find-nth-occurrence-of-charactermodified.sl'
('f', [('_arg_0', 'String'), ('_arg_1', 'Int')], 'Int')
[(['replies to _aya, _tasisuke, and _chan', '1'], 12),
 (['replies to _aya, _tasisuke, and _chan', '2'], 18),
 (['replies to _aya, _tasisuke, and _chan', '3'], 33)]
'clean-and-reformat-telephone-numbersmodified.sl'
('f', [('_arg_0', 'String')], 'String')
[(['801-456-8765'], '8014568765'),
 (['<978> 654-0299'], '9786540299'),
 (['978.654.0299'], '9786540299')]
'17212077modified.sl'
('f', [('_arg_0', 'String')], 'String')
[(['01/15/2013'], '01/2013'),
 (['03/07/2011'], '03/2011'),
 (['05/09/2009'], '05/2009')]
'11604909

In [40]:
sample_gpt_solutions(LARGER_STRING_GRAMMAR_SYGUS["exceljet2modified.sl"])

(['(define-fun f (_arg_0 String) String\n(str.substr _arg_0 (+ 1 (str.indexof _arg_0 "." (- (str.len _arg_0) 1))) (- (str.len _arg_0) (+ 1 (str.indexof _arg_0 "." (- (str.len _arg_0) 1))))))',
  '(define-fun f (_arg_0 String) String\n(str.substr _arg_0 (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1) (- (str.len _arg_0) (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1))))',
  '(define-fun f (_arg_0 String) String\n(str.substr _arg_0 (+ 1 (str.indexof _arg_0 "." (- (str.len _arg_0) 1))) (- (str.len _arg_0) (+ 1 (str.indexof _arg_0 "." (- (str.len _arg_0) 1))))))',
  '(define-fun f (_arg_0 String) String\n(str.substr _arg_0 (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1) (- (str.len _arg_0) (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1))))',
  '(define-fun f (_arg_0 String) String\n(str.substr _arg_0 (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1) (- (str.len _arg_0) (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1))))',
  '(define-fun f (_arg_0 String) St

In [41]:
LARGER_STRING_GRAMMAR_OUTPUT = {}

for filename, problem in LARGER_STRING_GRAMMAR_SYGUS.items():
    print(filename)
    solutions, time_diff_ms = sample_gpt_solutions(problem)
    LARGER_STRING_GRAMMAR_OUTPUT[filename] = {
        "solutions": solutions,
        "time_ms": time_diff_ms
    }

exceljet2modified.sl


31753108modified.sl
find-nth-occurrence-of-charactermodified.sl
clean-and-reformat-telephone-numbersmodified.sl
17212077modified.sl
11604909modified.sl
stackoverflow4modified.sl
remove-text-by-positionmodified.sl
44789427modified.sl
stackoverflow11modified.sl
stackoverflow3modified.sl
initials-longmodified.sl
phone-7modified.sl
43606446modified.sl
extract-nth-word-from-text-stringmodified.sl
remove-leading-and-trailing-spaces-from-textmodified.sl
phone-5-long-repeatmodified.sl
count-total-words-in-a-cellmodified.sl
stackoverflow8modified.sl
strip-numeric-characters-from-cellmodified.sl
phone-9modified.sl
initialsmodified.sl
phone-10-long-repeatmodified.sl
39060015modified.sl
phone-7-long-repeatmodified.sl
stackoverflow6modified.sl
strip-non-numeric-charactersmodified.sl
stackoverflow1modified.sl
get-last-wordmodified.sl
phone-5modified.sl
phone-6-long-repeatmodified.sl
get-last-line-in-cellmodified.sl
phone-9-shortmodified.sl
38871714modified.sl
phone-5-shortmodified.sl
initials_smallm

In [44]:
LARGER_STRING_GRAMMAR_OUTPUT_FILE = ROOT_DIRECTORY / "sygus/larger-string-grammar-completions.json"
LARGER_STRING_GRAMMAR_OUTPUT_FILE.write_text(json.dumps(LARGER_STRING_GRAMMAR_OUTPUT, indent=2))

112525

### String Grammar

In [8]:
STRING_GRAMMAR_PROBLEMS = {}
STRING_GRAMMAR_COMMENTS = {}

for file in STRING_GRAMMAR_DIRECTORY.iterdir():
    print(file.name)
    with open(file, "r") as f:
        contents = file.read_text()
        STRING_GRAMMAR_PROBLEMS[file.name] = sexp.loads(contents)
        STRING_GRAMMAR_COMMENTS[file.name] = [
            line for line in contents.split("\n") if is_comment(line)
        ]

get-first-name-from-name-with-comma.sl
strip-html-from-text-or-numbers.sl
count-total-words-in-a-cell.sl
stackoverflow8.sl
phone-6-long.sl
phone-10.sl
phone-6.sl
phone-7-short.sl
phone-7.sl
univ_6_short.sl
stackoverflow9.sl
phone-9-long-repeat.sl
get-last-word.sl
phone-10_short.sl
17212077.sl
phone-5.sl
39060015.sl
phone-7-long-repeat.sl
38871714.sl
43120683.sl
stackoverflow11.sl
split-text-string-at-specific-character.sl
get-domain-name-from-url.sl
get-last-name-from-name-with-comma.sl
initials.sl
univ_5-long-repeat.sl
stackoverflow10.sl
phone-9-short.sl
phone-6-short.sl
43606446.sl
initials-long.sl
phone-5-long-repeat.sl
phone-7-long.sl
univ_6-long.sl
33619752.sl
extract-text-between-parentheses.sl
remove-leading-and-trailing-spaces-from-text.sl
strip-non-numeric-characters.sl
get-middle-name-from-full-name.sl
initials_small.sl
stackoverflow5.sl
extract-word-that-begins-with-specific-character.sl
35744094.sl
get-last-line-in-cell.sl
31753108.sl
exceljet4.sl
44789427.sl
stackoverflow4

In [10]:
pprint(STRING_GRAMMAR_PROBLEMS["get-first-name-from-name-with-comma.sl"][1])
pprint(STRING_GRAMMAR_COMMENTS["get-first-name-from-name-with-comma.sl"])

[Symbol('synth-fun'),
 Symbol('f'),
 [[Symbol('_arg_0'), Symbol('String')], [Symbol('_arg_1'), Symbol('Int')]],
 Symbol('String'),
 [[Symbol('Start'), Symbol('String'), [Symbol('ntString')]],
  [Symbol('ntString'),
   Symbol('String'),
   [Symbol('_arg_0'),
    '',
    ' ',
    ',',
    [Symbol('str.++'), Symbol('ntString'), Symbol('ntString')],
    [Symbol('str.replace'),
     Symbol('ntString'),
     Symbol('ntString'),
     Symbol('ntString')],
    [Symbol('str.at'), Symbol('ntString'), Symbol('ntInt')],
    [Symbol('int.to.str'), Symbol('ntInt')],
    [Symbol('ite'), Symbol('ntBool'), Symbol('ntString'), Symbol('ntString')],
    [Symbol('str.substr'),
     Symbol('ntString'),
     Symbol('ntInt'),
     Symbol('ntInt')]]],
  [Symbol('ntInt'),
   Symbol('Int'),
   [Symbol('_arg_1'),
    1,
    0,
    -1,
    1,
    2,
    [Symbol('+'), Symbol('ntInt'), Symbol('ntInt')],
    [Symbol('-'), Symbol('ntInt'), Symbol('ntInt')],
    [Symbol('str.len'), Symbol('ntString')],
    [Symbol('str.

In [12]:
STRING_GRAMMAR_SYGUS = {}
for filename, sexps in STRING_GRAMMAR_PROBLEMS.items():
    pprint(filename)
    STRING_GRAMMAR_SYGUS[filename] = SygusProblem.from_sexps(sexps, STRING_GRAMMAR_COMMENTS[filename])

'get-first-name-from-name-with-comma.sl'
'strip-html-from-text-or-numbers.sl'
'count-total-words-in-a-cell.sl'
'stackoverflow8.sl'
'phone-6-long.sl'
'phone-10.sl'
'phone-6.sl'
'phone-7-short.sl'
'phone-7.sl'
'univ_6_short.sl'
'stackoverflow9.sl'
'phone-9-long-repeat.sl'
'get-last-word.sl'
'phone-10_short.sl'
'17212077.sl'
'phone-5.sl'
'39060015.sl'
'phone-7-long-repeat.sl'
'38871714.sl'
'43120683.sl'
'stackoverflow11.sl'
'split-text-string-at-specific-character.sl'
'get-domain-name-from-url.sl'
'get-last-name-from-name-with-comma.sl'
'initials.sl'
'univ_5-long-repeat.sl'
'stackoverflow10.sl'
'phone-9-short.sl'
'phone-6-short.sl'
'43606446.sl'
'initials-long.sl'
'phone-5-long-repeat.sl'
'phone-7-long.sl'
'univ_6-long.sl'
'33619752.sl'
'extract-text-between-parentheses.sl'
'remove-leading-and-trailing-spaces-from-text.sl'
'strip-non-numeric-characters.sl'
'get-middle-name-from-full-name.sl'
'initials_small.sl'
'stackoverflow5.sl'
'extract-word-that-begins-with-specific-character.sl'
'357

In [13]:
pprint(STRING_GRAMMAR_SYGUS["get-first-name-from-name-with-comma.sl"].user_message)

('[GRAMMAR]\n'
 '(synth-fun f ((_arg_0 String) (_arg_1 Int)) String ((Start String '
 '(ntString)) (ntString String (_arg_0 "" " " "," (str.++ ntString ntString) '
 '(str.replace ntString ntString ntString) (str.at ntString ntInt) (int.to.str '
 'ntInt) (ite ntBool ntString ntString) (str.substr ntString ntInt ntInt))) '
 '(ntInt Int (_arg_1 1 0 -1 1 2 (+ ntInt ntInt) (- ntInt ntInt) (str.len '
 'ntString) (str.to.int ntString) (ite ntBool ntInt ntInt) (str.indexof '
 'ntString ntString ntInt))) (ntBool Bool (true false (= ntInt ntInt) '
 '(str.prefixof ntString ntString) (str.suffixof ntString ntString) '
 '(str.contains ntString ntString)))))\n'
 '\n'
 '[NATURAL LANGUAGE SPECIFICATION]\n'
 '; https=//exceljet.net/formula/get-first-name-from-name-with-comma\n'
 '\n'
 '[EXAMPLES]\n'
 'Chang,Amy, 1 -> Amy\n'
 'Chang,Amy, 2 -> Chang\n'
 'smith,bobby, 2 -> smith\n'
 'smith,bobby, 1 -> bobby\n'
 '\n'
 '\n'
 '[SOLUTION]\n'
 '(define-fun f (_arg_0 String _arg_1 Int) String')


In [14]:
for filename, problem in STRING_GRAMMAR_SYGUS.items():
    pprint(filename)
    pprint(problem.signature)
    pprint(problem.examples)

'get-first-name-from-name-with-comma.sl'
('f', [('_arg_0', 'String'), ('_arg_1', 'Int')], 'String')
[(['Chang,Amy', '1'], 'Amy'),
 (['Chang,Amy', '2'], 'Chang'),
 (['smith,bobby', '2'], 'smith'),
 (['smith,bobby', '1'], 'bobby')]
'strip-html-from-text-or-numbers.sl'
('f', [('_arg_0', 'String')], 'String')
[(['<b>0.66<b>'], '0.66'),
 (['<b>0.409<b>'], '0.409'),
 (['<b>0.7268<b>'], '0.7268')]
'count-total-words-in-a-cell.sl'
('f', [('_arg_0', 'String')], 'Int')
[(['humpty dumpty'], 2),
 (['humpty dumpty sat on a wall,'], 6),
 (['couldnt put humpty together again.'], 5)]
'stackoverflow8.sl'
('f', [('_arg_0', 'String')], 'String')
[(['home/Excel/Sheet1.xls'], 'Sheet1.xls'),
 (['home/user/Sheet1.xls'], 'Sheet1.xls')]
'phone-6-long.sl'
('f', [('name', 'String')], 'String')
[(['+106 769-858-438'], '769'),
 (['+83 973-757-831'], '973'),
 (['+62 647-787-775'], '647'),
 (['+172 027-507-632'], '027'),
 (['+72 001-050-856'], '001'),
 (['+95 310-537-401'], '310'),
 (['+6 775-969-238'], '775'),
 (['

In [19]:
sample_gpt_solutions(STRING_GRAMMAR_SYGUS["get-first-name-from-name-with-comma.sl"])

(['(define-fun f (_arg_0 String _arg_1 Int) String\n(str.at (str.split _arg_0 ",") (- _arg_1 1)))',
  '(define-fun f (_arg_0 String _arg_1 Int) String\n(str.at (str.split _arg_0 ",") (- _arg_1 1)))',
  '(define-fun f (_arg_0 String _arg_1 Int) String\n(str.at (str.split _arg_0 ",") (- _arg_1 1)))',
  '(define-fun f (_arg_0 String _arg_1 Int) String\n(str.at (str.split _arg_0 ",") (- _arg_1 1)))',
  '(define-fun f (_arg_0 String _arg_1 Int) String\n(let ((s (str.replace _arg_0 " " "")))\n  (ite (= _arg_1 1)\n    (str.substr s (+ (str.indexof s "," 0) 1) (- (str.len s) (str.indexof s "," 0) 1))\n    (str.substr s 0 (str.indexof s "," 0))\n  )\n))',
  '(define-fun f (_arg_0 String _arg_1 Int) String\n(str.at (str.split _arg_0 ",") (- _arg_1 1)))',
  '(define-fun f (_arg_0 String _arg_1 Int) String\n(str.at (str.split _arg_0 ",") (- _arg_1 1)))',
  '(define-fun f (_arg_0 String _arg_1 Int) String\n(str.at (str.split _arg_0 ",") (- _arg_1 1)))',
  '(define-fun f (_arg_0 String _arg_1 Int) S

In [20]:
STRING_GRAMMAR_OUTPUT = {}

for filename, problem in STRING_GRAMMAR_SYGUS.items():
    print(filename)
    solutions, time_diff_ms = sample_gpt_solutions(problem)
    STRING_GRAMMAR_OUTPUT[filename] = {
        "solutions": solutions,
        "time_ms": time_diff_ms
    }

get-first-name-from-name-with-comma.sl
strip-html-from-text-or-numbers.sl
count-total-words-in-a-cell.sl
stackoverflow8.sl
phone-6-long.sl
phone-10.sl
phone-6.sl
phone-7-short.sl
phone-7.sl
univ_6_short.sl
stackoverflow9.sl
phone-9-long-repeat.sl
get-last-word.sl
phone-10_short.sl
17212077.sl
phone-5.sl
39060015.sl
phone-7-long-repeat.sl
38871714.sl
43120683.sl
stackoverflow11.sl
split-text-string-at-specific-character.sl
get-domain-name-from-url.sl
get-last-name-from-name-with-comma.sl
initials.sl
univ_5-long-repeat.sl
stackoverflow10.sl
phone-9-short.sl
phone-6-short.sl
43606446.sl
initials-long.sl
phone-5-long-repeat.sl
phone-7-long.sl
univ_6-long.sl
33619752.sl
extract-text-between-parentheses.sl
remove-leading-and-trailing-spaces-from-text.sl
strip-non-numeric-characters.sl
get-middle-name-from-full-name.sl
initials_small.sl
stackoverflow5.sl
extract-word-that-begins-with-specific-character.sl
35744094.sl
get-last-line-in-cell.sl
31753108.sl
exceljet4.sl
44789427.sl
stackoverflow4

In [21]:
STRING_GRAMMAR_OUTPUT_FILE = ROOT_DIRECTORY / "sygus/string-grammar-completions.json"
STRING_GRAMMAR_OUTPUT_FILE.write_text(json.dumps(STRING_GRAMMAR_OUTPUT, indent=2))

119142

### Circuit

In [8]:
CIRCUIT_EXAMPLES_FILE = ROOT_DIRECTORY / "sygus/io-results-circuit.json"
CIRCUIT_EXAMPLES_JSON = json.loads(CIRCUIT_EXAMPLES_FILE.read_text())
pprint(CIRCUIT_EXAMPLES["CrCy_10-sbox2-D5-sIn79.sl"])

NameError: name 'CIRCUIT_EXAMPLES' is not defined

In [9]:
CIRCUIT_EXAMPLES = {}

for filename, examples in CIRCUIT_EXAMPLES_JSON.items():
    CIRCUIT_EXAMPLES[filename] = [
        (example["inputs"], example["output"]) for example in examples
    ]

pprint(CIRCUIT_EXAMPLES["CrCy_10-sbox2-D5-sIn79.sl"])

[(['true', 'true', 'true', 'true', 'true', 'true'], 'true'),
 (['false', 'true', 'false', 'false', 'true', 'false'], 'false'),
 (['false', 'true', 'true', 'true', 'true', 'false'], 'true'),
 (['true', 'false', 'true', 'true', 'true', 'true'], 'false'),
 (['false', 'false', 'false', 'true', 'true', 'false'], 'false'),
 (['true', 'false', 'true', 'false', 'false', 'false'], 'false'),
 (['false', 'true', 'true', 'false', 'true', 'true'], 'true'),
 (['true', 'true', 'false', 'false', 'false', 'true'], 'false'),
 (['true', 'true', 'false', 'true', 'false', 'true'], 'false'),
 (['false', 'true', 'false', 'true', 'false', 'false'], 'false'),
 (['true', 'true', 'true', 'true', 'true', 'true'], 'true'),
 (['true', 'true', 'true', 'true', 'true', 'false'], 'true'),
 (['true', 'true', 'true', 'true', 'false', 'true'], 'false'),
 (['true', 'true', 'true', 'true', 'false', 'false'], 'false'),
 (['true', 'true', 'true', 'false', 'true', 'true'], 'false'),
 (['true', 'true', 'true', 'false', 'true', 

In [10]:
CIRCUIT_PROBLEMS = {}
CIRCUIT_COMMENTS = {}

for file in CIRCUIT_DIRECTORY.iterdir():
    print(file.name)
    with open(file, "r") as f:
        contents = file.read_text()
        CIRCUIT_PROBLEMS[file.name] = sexp.loads(contents)
        CIRCUIT_COMMENTS[file.name] = [
            line for line in contents.split("\n") if is_comment(line)
        ]

CrCy_10-sbox2-D7-sIn1.sl
CrCy_6-P10-D7-sIn.sl
CrCy_6-P10-D7-sIn3.sl
CrCy_10-sbox2-D5-sIn104.sl
CrCy_9-pprmAll-D7-sIn1.sl
CrCy_6-P10-D5-sIn.sl
CrCy_10-sbox2-D5-sIn88.sl
CrCy_8-P12-D7-sIn5.sl
CrCy_10-sbox2-D5-sIn89.sl
CrCy_2-P6_2-P6.sl
CrCy_6-P10-D7-sIn5.sl
CrCy_8-P12-D7-sIn1.sl
CrCy_10-sbox2-D5-sIn79.sl
CrCy_10-sbox2-D5-sIn78.sl
CrCy_10-sbox2-D5-sIn91.sl
CrCy_6-P10-D9-sIn5.sl
CrCy_8-P12-D5-sIn1.sl
CrCy_8-P12-D9-sIn1.sl
CrCy_10-sbox2-D5-sIn90.sl
CrCy_10-sbox2-D5-sIn92.sl
CrCy_10-sbox2-D5-sIn15.sl
CrCy_8-P12-D5-sIn3.sl
CrCy_10-sbox2-D5-sIn14.sl
CrCy_10-sbox2-D5-sIn80.sl
CrCy_10-sbox2-D5-sIn77.sl
CrCy_6-P10-D9-sIn3.sl
CrCy_6-P10-D5-sIn3.sl
CrCy_9-pprmAll-D5-sIn1.sl
CrCy_9-pprmAll-D9-sIn1.sl
CrCy_10-sbox2-D5-sIn76.sl
CrCy_6-P10-D9-sIn.sl


In [11]:
pprint(CIRCUIT_PROBLEMS["CrCy_10-sbox2-D5-sIn79.sl"][2])
pprint(CIRCUIT_COMMENTS["CrCy_10-sbox2-D5-sIn79.sl"])

[Symbol('synth-fun'),
 Symbol('skel'),
 [[Symbol('LN70'), Symbol('Bool')],
  [Symbol('LN73'), Symbol('Bool')],
  [Symbol('LN77'), Symbol('Bool')],
  [Symbol('LN236'), Symbol('Bool')],
  [Symbol('LN252'), Symbol('Bool')],
  [Symbol('LN253'), Symbol('Bool')]],
 Symbol('Bool'),
 [[Symbol('Start'),
   Symbol('Bool'),
   [[Symbol('and'), Symbol('depth1'), Symbol('depth1')],
    [Symbol('not'), Symbol('depth1')],
    [Symbol('or'), Symbol('depth1'), Symbol('depth1')],
    [Symbol('xor'), Symbol('depth1'), Symbol('depth1')]]],
  [Symbol('depth1'),
   Symbol('Bool'),
   [[Symbol('and'), Symbol('depth2'), Symbol('depth2')],
    [Symbol('not'), Symbol('depth2')],
    [Symbol('or'), Symbol('depth2'), Symbol('depth2')],
    [Symbol('xor'), Symbol('depth2'), Symbol('depth2')]]],
  [Symbol('depth2'),
   Symbol('Bool'),
   [[Symbol('and'), Symbol('depth3'), Symbol('depth3')],
    [Symbol('not'), Symbol('depth3')],
    [Symbol('or'), Symbol('depth3'), Symbol('depth3')],
    [Symbol('xor'), Symbol('dep

In [12]:
CIRCUIT_GRAMMAR_SYGUS = {}
for filename, sexps in CIRCUIT_PROBLEMS.items():
    pprint(filename)
    CIRCUIT_GRAMMAR_SYGUS[filename] = SygusProblem.from_sexps_with_examples(sexps, CIRCUIT_COMMENTS[filename], random.sample(CIRCUIT_EXAMPLES[filename], min(len(CIRCUIT_EXAMPLES[filename]), 10)))

'CrCy_10-sbox2-D7-sIn1.sl'
'CrCy_6-P10-D7-sIn.sl'
'CrCy_6-P10-D7-sIn3.sl'
'CrCy_10-sbox2-D5-sIn104.sl'
'CrCy_9-pprmAll-D7-sIn1.sl'
'CrCy_6-P10-D5-sIn.sl'
'CrCy_10-sbox2-D5-sIn88.sl'
'CrCy_8-P12-D7-sIn5.sl'
'CrCy_10-sbox2-D5-sIn89.sl'
'CrCy_2-P6_2-P6.sl'
'CrCy_6-P10-D7-sIn5.sl'
'CrCy_8-P12-D7-sIn1.sl'
'CrCy_10-sbox2-D5-sIn79.sl'
'CrCy_10-sbox2-D5-sIn78.sl'
'CrCy_10-sbox2-D5-sIn91.sl'
'CrCy_6-P10-D9-sIn5.sl'
'CrCy_8-P12-D5-sIn1.sl'
'CrCy_8-P12-D9-sIn1.sl'
'CrCy_10-sbox2-D5-sIn90.sl'
'CrCy_10-sbox2-D5-sIn92.sl'
'CrCy_10-sbox2-D5-sIn15.sl'
'CrCy_8-P12-D5-sIn3.sl'
'CrCy_10-sbox2-D5-sIn14.sl'
'CrCy_10-sbox2-D5-sIn80.sl'
'CrCy_10-sbox2-D5-sIn77.sl'
'CrCy_6-P10-D9-sIn3.sl'
'CrCy_6-P10-D5-sIn3.sl'
'CrCy_9-pprmAll-D5-sIn1.sl'
'CrCy_9-pprmAll-D9-sIn1.sl'
'CrCy_10-sbox2-D5-sIn76.sl'
'CrCy_6-P10-D9-sIn.sl'


In [13]:
print(CIRCUIT_GRAMMAR_SYGUS["CrCy_10-sbox2-D5-sIn79.sl"].user_message)

[GRAMMAR]
(synth-fun skel ((LN70 Bool) (LN73 Bool) (LN77 Bool) (LN236 Bool) (LN252 Bool) (LN253 Bool)) Bool ((Start Bool ((and depth1 depth1) (not depth1) (or depth1 depth1) (xor depth1 depth1))) (depth1 Bool ((and depth2 depth2) (not depth2) (or depth2 depth2) (xor depth2 depth2))) (depth2 Bool ((and depth3 depth3) (not depth3) (or depth3 depth3) (xor depth3 depth3) LN77)) (depth3 Bool ((and depth4 depth4) (not depth4) (or depth4 depth4) (xor depth4 depth4) LN70 LN73 LN236 LN252)) (depth4 Bool (LN253))))

[NATURAL LANGUAGE SPECIFICATION]


[EXAMPLES]
false, true, false, true, false, true -> false
true, true, true, false, false, true -> false
true, true, true, true, true, false -> true
false, false, false, true, true, false -> false
true, true, true, false, false, true -> true
true, true, false, false, false, true -> false
false, false, false, false, false, true -> false
true, true, false, true, true, true -> false
false, true, true, true, true, false -> true
false, false, false, true,

In [14]:
sample_gpt_solutions(CIRCUIT_GRAMMAR_SYGUS["CrCy_10-sbox2-D5-sIn79.sl"])

(['(define-fun skel (LN70 Bool LN73 Bool LN77 Bool LN236 Bool LN252 Bool LN253 Bool) Bool\n(and (or LN70 LN73) (or LN77 LN236) LN252 LN253))',
  '(define-fun skel (LN70 Bool LN73 Bool LN77 Bool LN236 Bool LN252 Bool LN253 Bool) Bool\n(and LN70 (not LN73) LN77 (not LN236) LN252 (not LN253))',
  '(define-fun skel (LN70 Bool LN73 Bool LN77 Bool LN236 Bool LN252 Bool LN253 Bool) Bool\n(and LN70 (not LN73) LN77 LN236 (not LN252) LN253))',
  '(define-fun skel (LN70 Bool LN73 Bool LN77 Bool LN236 Bool LN252 Bool LN253 Bool) Bool\n(and LN253 (not LN236) (not LN77) LN73 (not LN70))',
  '(define-fun skel (LN70 Bool LN73 Bool LN77 Bool LN236 Bool LN252 Bool LN253 Bool) Bool\n(and LN70 (not LN73) LN77 LN236 (not LN252) LN253))',
  '(define-fun skel (LN70 Bool LN73 Bool LN77 Bool LN236 Bool LN252 Bool LN253 Bool) Bool\n(and LN70 (not LN73) LN77 (not LN236) LN252 (not LN253)))',
  '(define-fun skel (LN70 Bool LN73 Bool LN77 Bool LN236 Bool LN252 Bool LN253 Bool) Bool\n(and (or LN70 LN73) (not LN236)

In [15]:
CIRCUIT_OUTPUT = {}

for filename, problem in CIRCUIT_GRAMMAR_SYGUS.items():
    print(filename)
    solutions, time_diff_ms = sample_gpt_solutions(problem)
    CIRCUIT_OUTPUT[filename] = {
        "solutions": solutions,
        "time_ms": time_diff_ms
    }

CrCy_10-sbox2-D7-sIn1.sl
CrCy_6-P10-D7-sIn.sl
CrCy_6-P10-D7-sIn3.sl
CrCy_10-sbox2-D5-sIn104.sl
CrCy_9-pprmAll-D7-sIn1.sl
CrCy_6-P10-D5-sIn.sl
CrCy_10-sbox2-D5-sIn88.sl
CrCy_8-P12-D7-sIn5.sl
CrCy_10-sbox2-D5-sIn89.sl
CrCy_2-P6_2-P6.sl
CrCy_6-P10-D7-sIn5.sl
CrCy_8-P12-D7-sIn1.sl
CrCy_10-sbox2-D5-sIn79.sl
CrCy_10-sbox2-D5-sIn78.sl
CrCy_10-sbox2-D5-sIn91.sl
CrCy_6-P10-D9-sIn5.sl
CrCy_8-P12-D5-sIn1.sl
CrCy_8-P12-D9-sIn1.sl
CrCy_10-sbox2-D5-sIn90.sl
CrCy_10-sbox2-D5-sIn92.sl
CrCy_10-sbox2-D5-sIn15.sl
CrCy_8-P12-D5-sIn3.sl
CrCy_10-sbox2-D5-sIn14.sl
CrCy_10-sbox2-D5-sIn80.sl
CrCy_10-sbox2-D5-sIn77.sl
CrCy_6-P10-D9-sIn3.sl
CrCy_6-P10-D5-sIn3.sl
CrCy_9-pprmAll-D5-sIn1.sl
CrCy_9-pprmAll-D9-sIn1.sl
CrCy_10-sbox2-D5-sIn76.sl
CrCy_6-P10-D9-sIn.sl


In [16]:
CIRCUIT_OUTPUT_FILE = ROOT_DIRECTORY / "sygus/circuit-completions.json"
CIRCUIT_OUTPUT_FILE.write_text(json.dumps(CIRCUIT_OUTPUT, indent=2))

66544

## Example Generation

### Hackers Delight

In [17]:
HACKERS_DELIGHT_PROBLEMS = {}
HACKERS_DELIGHT_COMMENTS = {}

for file in HACKERS_DELIGHT_DIRECTORY.iterdir():
    print(file.name)
    with open(file, "r") as f:
        contents = file.read_text()
        HACKERS_DELIGHT_PROBLEMS[file.name] = sexp.loads(contents)
        HACKERS_DELIGHT_COMMENTS[file.name] = [
            line for line in contents.split("\n") if is_comment(line)
        ]


hd-25.sl
hd-24.sl
hd-09.sl
hd-26.sl
hd-27.sl
hd-08.sl
hd-23.sl
hd-22.sl
hd-18.sl
hd-20.sl
hd-21.sl
hd-19.sl
hd-07.sl
hd-10.sl
hd-11.sl
hd-06.sl
hd-04.sl
hd-13.sl
hd-12.sl
hd-05.sl
hd-16.sl
hd-01.sl
hd-17.sl
hd-15.sl
hd-02.sl
hd-03.sl
hd-14.sl


In [18]:
HACKERS_DELIGHT_SYNTH_FUNS = {}
HACKERS_DELIGHT_CONSTRAINTS = {}

for filename, sexps in HACKERS_DELIGHT_PROBLEMS.items():
    synth_fun = get_synth_fun(sexps)
    HACKERS_DELIGHT_SYNTH_FUNS[filename] = synth_fun
    HACKERS_DELIGHT_CONSTRAINTS[filename] = get_constraints(sexps)

pprint(HACKERS_DELIGHT_SYNTH_FUNS["hd-25.sl"])
pprint(HACKERS_DELIGHT_CONSTRAINTS["hd-25.sl"])

[Symbol('synth-fun'),
 Symbol('f'),
 [[Symbol('x'), [Symbol('BitVec'), 64]], [Symbol('y'), [Symbol('BitVec'), 64]]],
 [Symbol('BitVec'), 64],
 [[Symbol('Start'),
   [Symbol('BitVec'), 64],
   [[Symbol('bvnot'), Symbol('Start')],
    [Symbol('bvxor'), Symbol('Start'), Symbol('Start')],
    [Symbol('bvand'), Symbol('Start'), Symbol('Start')],
    [Symbol('bvor'), Symbol('Start'), Symbol('Start')],
    [Symbol('bvneg'), Symbol('Start')],
    [Symbol('bvadd'), Symbol('Start'), Symbol('Start')],
    [Symbol('bvmul'), Symbol('Start'), Symbol('Start')],
    [Symbol('bvudiv'), Symbol('Start'), Symbol('Start')],
    [Symbol('bvurem'), Symbol('Start'), Symbol('Start')],
    [Symbol('bvlshr'), Symbol('Start'), Symbol('Start')],
    [Symbol('bvashr'), Symbol('Start'), Symbol('Start')],
    [Symbol('bvshl'), Symbol('Start'), Symbol('Start')],
    [Symbol('bvsdiv'), Symbol('Start'), Symbol('Start')],
    [Symbol('bvsrem'), Symbol('Start'), Symbol('Start')],
    [Symbol('bvsub'), Symbol('Start'), Sym

In [19]:
def is_function_definition(sexp: t.Any) -> bool:
    return isinstance(sexp[0], Symbol) and sexp[0].value() == "define-fun"

def get_function_definitions(sexps: t.List[t.Any]) -> t.List[t.Any]:
    return [sexp for sexp in sexps if is_function_definition(sexp)]

HACKERS_DELIGHT_FUNCTION_DEFINITIONS = {}

for filename, sexps in HACKERS_DELIGHT_PROBLEMS.items():
    HACKERS_DELIGHT_FUNCTION_DEFINITIONS[filename] = get_function_definitions(sexps)

pprint(HACKERS_DELIGHT_FUNCTION_DEFINITIONS["hd-25.sl"])

[[Symbol('define-fun'),
  Symbol('hd25'),
  [[Symbol('x'), [Symbol('BitVec'), 64]],
   [Symbol('y'), [Symbol('BitVec'), 64]]],
  [Symbol('BitVec'), 64],
  [Symbol('bvadd'),
   [Symbol('bvadd'),
    [Symbol('bvlshr'),
     [Symbol('bvadd'),
      [Symbol('bvmul'),
       [Symbol('bvand'), Symbol('x'), Symbol('#xffffffffffffffff')],
       [Symbol('bvlshr'), Symbol('y'), Symbol('#x0000000000000010')]],
      [Symbol('bvand'),
       [Symbol('bvadd'),
        [Symbol('bvmul'),
         [Symbol('bvlshr'), Symbol('x'), Symbol('#x0000000000000010')],
         [Symbol('bvand'), Symbol('y'), Symbol('#xffffffffffffffff')]],
        [Symbol('bvlshr'),
         [Symbol('bvmul'),
          [Symbol('bvand'), Symbol('x'), Symbol('#xffffffffffffffff')],
          [Symbol('bvand'), Symbol('y'), Symbol('#xffffffffffffffff')]],
         Symbol('#x0000000000000010')]],
       Symbol('#xffffffffffffffff')]],
     Symbol('#x0000000000000010')],
    [Symbol('bvlshr'),
     [Symbol('bvadd'),
      [Symbol('b

In [30]:
EXAMPLE_GENERATION_SYSTEM_PROMPT = """You are a coding assistant. Be precise and terse.
You will be given a SyGus grammar, a natural language specification, a set of function definitions and a set of constraints.
Your task is to use the given information to generate a 10 of input-output examples that satisfy the constraints."""

TOOL = {
        "type": "function",
        "function": {
            "name": "send_examples",
            "description": "send a list of input-output examples to the user",
            "parameters": {
                "type": "object",
                "properties": {
                    "examples": {
                        "type": "array",
                        "description": "a list of input-output examples",
                        "items": {
                            "type": "object",
                            "description": "a single input-output example",
                            "properties": {
                                "inputs": {
                                    "type": "array",
                                    "description": "a list of inputs",
                                    "items": {
                                        "type": "string",
                                        "description": "a single input"
                                    }
                                },
                                "output": {
                                    "type": "string",
                                    "description": "the output"
                                }
                            }
                        }
                    }
                }
            }
        }
    }

In [31]:
TEST_GRAMMAR = HACKERS_DELIGHT_SYNTH_FUNS["hd-25.sl"]
TEST_NAT_LANG_SPEC = "\n".join(HACKERS_DELIGHT_COMMENTS["hd-25.sl"])
TEST_FUNCTION_DEFINITIONS = "\n".join([sexp.dumps(fd) for fd in HACKERS_DELIGHT_FUNCTION_DEFINITIONS["hd-25.sl"]])
TEST_CONSTRAINTS = "\n".join([sexp.dumps(c) for c in  HACKERS_DELIGHT_CONSTRAINTS["hd-25.sl"]])

TEST_USER_MESSAGE = f"""[GRAMMAR]
{sexp.dumps(TEST_GRAMMAR)}

[NATURAL LANGUAGE SPECIFICATION]
{TEST_NAT_LANG_SPEC}

[FUNCTION DEFINITIONS]
{TEST_FUNCTION_DEFINITIONS}

[CONSTRAINTS]
{TEST_CONSTRAINTS}"""

In [32]:
pprint(TEST_USER_MESSAGE)

('[GRAMMAR]\n'
 '(synth-fun f ((x (BitVec 64)) (y (BitVec 64))) (BitVec 64) ((Start (BitVec '
 '64) ((bvnot Start) (bvxor Start Start) (bvand Start Start) (bvor Start '
 'Start) (bvneg Start) (bvadd Start Start) (bvmul Start Start) (bvudiv Start '
 'Start) (bvurem Start Start) (bvlshr Start Start) (bvashr Start Start) (bvshl '
 'Start Start) (bvsdiv Start Start) (bvsrem Start Start) (bvsub Start Start) x '
 'y \\#x0000000000000001 \\#x0000000000000010 \\#xffffffffffffffff (ite '
 'StartBool Start Start))) (StartBool Bool ((= Start Start)))))\n'
 '\n'
 '[NATURAL LANGUAGE SPECIFICATION]\n'
 '; Compute higher order half of product of x and y.\n'
 '\n'
 '[FUNCTION DEFINITIONS]\n'
 '(define-fun hd25 ((x (BitVec 64)) (y (BitVec 64))) (BitVec 64) (bvadd (bvadd '
 '(bvlshr (bvadd (bvmul (bvand x \\#xffffffffffffffff) (bvlshr y '
 '\\#x0000000000000010)) (bvand (bvadd (bvmul (bvlshr x \\#x0000000000000010) '
 '(bvand y \\#xffffffffffffffff)) (bvlshr (bvmul (bvand x '
 '\\#xffffffffffffffff) (bv

In [33]:
completion = OPENAI.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": EXAMPLE_GENERATION_SYSTEM_PROMPT},
        {"role": "user", "content": TEST_USER_MESSAGE},
    ],
    tools=[TOOL],
    tool_choice=TOOL,
    n=5
)

pprint(completion)

ChatCompletion(id='chatcmpl-8y8CY0zhks3RQhhD8cR07U5kHLGQX', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_4ZitbMW5y0Y75LFEOi34zYAM', function=Function(arguments='{\n  "examples": [\n    {\n      "inputs": ["\\#x0000000000000001","\\#x0000000000000001"],\n      "output": "\\#x0000000000000000"\n    },\n    {\n      "inputs": ["\\#x0000000000000002","\\#x0000000000000002"],\n      "output": "\\#x0000000000000000"\n    },\n    {\n      "inputs": ["\\#x0000000000000010","\\#x0000000000000010"],\n      "output": "\\#x0000000000000001"\n    },\n    {\n      "inputs": ["\\#x0000000000000001","\\#x0000000000000010"],\n      "output": "\\#x0000000000000000"\n    },\n    {\n      "inputs": ["\\#x0000000000000010","\\#x0000000000000001"],\n      "output": "\\#x0000000000000000"\n    },\n    {\n      "inputs": ["\\#x000000000000000F","\\#x000000000000

In [36]:
examples = []
for choice in completion.choices:
    try:
        arguments = json.loads(choice.message.tool_calls[0].function.arguments)
        examples.extend(arguments["examples"])
    except Exception as e:
        print(e)
        continue

pprint(examples)

Invalid \escape: line 4 column 19 (char 42)
Invalid \escape: line 4 column 19 (char 42)
[{'inputs': ['0', '0'], 'output': '0'},
 {'inputs': ['1', '1'], 'output': '0'},
 {'inputs': ['2', '2'], 'output': '0'},
 {'inputs': ['10', '10'], 'output': '0'},
 {'inputs': ['100', '100'], 'output': '0'},
 {'inputs': ['1000', '1000'], 'output': '0'},
 {'inputs': ['10000', '10000'], 'output': '0'},
 {'inputs': ['100000', '100000'], 'output': '0'},
 {'inputs': ['1000000', '1000000'], 'output': '0'},
 {'inputs': ['10000000', '10000000'], 'output': '0'},
 {'inputs': ['0x0000000000000000', '0x0000000000000000'],
  'output': '0x0000000000000000'},
 {'inputs': ['0xffffffffffffffff', '0x0000000000000001'],
  'output': '0x0000000000000000'},
 {'inputs': ['0x0000000000000001', '0xffffffffffffffff'],
  'output': '0x0000000000000000'},
 {'inputs': ['0xffffffffffffffff', '0xffffffffffffffff'],
  'output': '0xffffffffffffffff'},
 {'inputs': ['0x0000000000000002', '0x0000000000000002'],
  'output': '0x00000000000

In [37]:
def generate_examples(filename: str, n=5):
    synth_fun = HACKERS_DELIGHT_SYNTH_FUNS[filename]
    function_definitions = "\n".join([sexp.dumps(fd) for fd in HACKERS_DELIGHT_FUNCTION_DEFINITIONS[filename]])
    constraints = "\n".join([sexp.dumps(c) for c in  HACKERS_DELIGHT_CONSTRAINTS[filename]])
    natural_language_spec = "\n".join(HACKERS_DELIGHT_COMMENTS[filename])
    user_message = f"""[GRAMMAR]
{sexp.dumps(synth_fun)}

[NATURAL LANGUAGE SPECIFICATION]
{natural_language_spec}

[FUNCTION DEFINITIONS]
{function_definitions}

[CONSTRAINTS]
{constraints}"""
    completion = OPENAI.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": EXAMPLE_GENERATION_SYSTEM_PROMPT},
            {"role": "user", "content": user_message},
        ],
        tools=[TOOL],
        tool_choice=TOOL,
        n=n
    )
    examples = []
    for choice in completion.choices:
        try:
            arguments = json.loads(choice.message.tool_calls[0].function.arguments)
            examples.extend(arguments["examples"])
        except Exception as e:
            print(e)
            continue
    return examples

In [38]:
import os
import pickle

if os.path.exists("./HACKERS_DELIGHT_GENERATED_EXAMPLES.pkl"):
    with open("./HACKERS_DELIGHT_GENERATED_EXAMPLES.pkl", "r") as f:
        HACKERS_DELIGHT_GENERATED_EXAMPLES = pickle.load(f)
else:
    HACKERS_DELIGHT_GENERATED_EXAMPLES = {}
    for filename in HACKERS_DELIGHT_SYNTH_FUNS.keys():
        print(filename)
        HACKERS_DELIGHT_GENERATED_EXAMPLES[filename] = generate_examples(filename, n=5)
    with open("./HACKERS_DELIGHT_GENERATED_EXAMPLES.pkl", "w") as f:
        pickle.dump(HACKERS_DELIGHT_GENERATED_EXAMPLES, f)

hd-25.sl
hd-24.sl
hd-09.sl
hd-26.sl
Invalid \escape: line 4 column 19 (char 42)
hd-27.sl
hd-08.sl
hd-23.sl
hd-22.sl
hd-18.sl
Invalid \escape: line 4 column 19 (char 42)
hd-20.sl
Invalid \escape: line 4 column 19 (char 42)
hd-21.sl
hd-19.sl
hd-07.sl
hd-10.sl
hd-11.sl
Invalid \escape: line 4 column 19 (char 42)
hd-06.sl
Invalid \escape: line 4 column 19 (char 42)
hd-04.sl
hd-13.sl
hd-12.sl
hd-05.sl
Invalid \escape: line 4 column 19 (char 42)
hd-16.sl
hd-01.sl
hd-17.sl
Invalid \escape: line 4 column 19 (char 42)
hd-15.sl
Invalid \escape: line 4 column 19 (char 42)
hd-02.sl
Invalid \escape: line 4 column 19 (char 42)
hd-03.sl
Invalid \escape: line 4 column 19 (char 42)
hd-14.sl


TypeError: write() argument must be str, not bytes

In [39]:
pprint(HACKERS_DELIGHT_GENERATED_EXAMPLES)

{'hd-01.sl': [{'inputs': ['#x0000000000000000'],
               'output': '#x0000000000000000'},
              {'inputs': ['#x0000000000000001'],
               'output': '#x0000000000000000'},
              {'inputs': ['#x0000000000000002'],
               'output': '#x0000000000000000'},
              {'inputs': ['#x0000000000000003'],
               'output': '#x0000000000000002'},
              {'inputs': ['#x0000000000000004'],
               'output': '#x0000000000000000'},
              {'inputs': ['#x0000000000000005'],
               'output': '#x0000000000000004'},
              {'inputs': ['#x0000000000000006'],
               'output': '#x0000000000000004'},
              {'inputs': ['#x0000000000000007'],
               'output': '#x0000000000000006'},
              {'inputs': ['#x0000000000000008'],
               'output': '#x0000000000000000'},
              {'inputs': ['#x0000000000000009'],
               'output': '#x0000000000000008'},
              {'inputs': ['0xF

In [40]:
# write to json
HACKERS_DELIGHT_GENERATED_EXAMPLES_FILE = ROOT_DIRECTORY / "sygus/hackers-delight-generated-examples.json"
HACKERS_DELIGHT_GENERATED_EXAMPLES_FILE.write_text(json.dumps(HACKERS_DELIGHT_GENERATED_EXAMPLES, indent=2))

126994

### Circuit

In [41]:
CIRCUIT_PROBLEMS = {}
CIRCUIT_COMMENTS = {}

for file in CIRCUIT_DIRECTORY.iterdir():
    print(file.name)
    with open(file, "r") as f:
        contents = file.read_text()
        CIRCUIT_PROBLEMS[file.name] = sexp.loads(contents)
        CIRCUIT_COMMENTS[file.name] = [
            line for line in contents.split("\n") if is_comment(line)
        ]

CrCy_10-sbox2-D7-sIn1.sl
CrCy_6-P10-D7-sIn.sl
CrCy_6-P10-D7-sIn3.sl
CrCy_10-sbox2-D5-sIn104.sl
CrCy_9-pprmAll-D7-sIn1.sl
CrCy_6-P10-D5-sIn.sl
CrCy_10-sbox2-D5-sIn88.sl
CrCy_8-P12-D7-sIn5.sl
CrCy_10-sbox2-D5-sIn89.sl
CrCy_2-P6_2-P6.sl
CrCy_6-P10-D7-sIn5.sl
CrCy_8-P12-D7-sIn1.sl
CrCy_10-sbox2-D5-sIn79.sl
CrCy_10-sbox2-D5-sIn78.sl
CrCy_10-sbox2-D5-sIn91.sl
CrCy_6-P10-D9-sIn5.sl
CrCy_8-P12-D5-sIn1.sl
CrCy_8-P12-D9-sIn1.sl
CrCy_10-sbox2-D5-sIn90.sl
CrCy_10-sbox2-D5-sIn92.sl
CrCy_10-sbox2-D5-sIn15.sl
CrCy_8-P12-D5-sIn3.sl
CrCy_10-sbox2-D5-sIn14.sl
CrCy_10-sbox2-D5-sIn80.sl
CrCy_10-sbox2-D5-sIn77.sl
CrCy_6-P10-D9-sIn3.sl
CrCy_6-P10-D5-sIn3.sl
CrCy_9-pprmAll-D5-sIn1.sl
CrCy_9-pprmAll-D9-sIn1.sl
CrCy_10-sbox2-D5-sIn76.sl
CrCy_6-P10-D9-sIn.sl


In [42]:
CIRCUIT_SYNTH_FUNS = {}
CIRCUIT_CONSTRAINTS = {}

for filename, sexps in CIRCUIT_PROBLEMS.items():
    synth_fun = get_synth_fun(sexps)
    CIRCUIT_SYNTH_FUNS[filename] = synth_fun
    CIRCUIT_CONSTRAINTS[filename] = get_constraints(sexps)

pprint(CIRCUIT_SYNTH_FUNS["CrCy_10-sbox2-D7-sIn1.sl"])
pprint(CIRCUIT_CONSTRAINTS["CrCy_10-sbox2-D7-sIn1.sl"])

[Symbol('synth-fun'),
 Symbol('skel'),
 [[Symbol('LN17'), Symbol('Bool')],
  [Symbol('k7'), Symbol('Bool')],
  [Symbol('LN34'), Symbol('Bool')],
  [Symbol('LN40'), Symbol('Bool')],
  [Symbol('LN47'), Symbol('Bool')],
  [Symbol('LN56'), Symbol('Bool')],
  [Symbol('LN65'), Symbol('Bool')],
  [Symbol('LN75'), Symbol('Bool')]],
 Symbol('Bool'),
 [[Symbol('Start'),
   Symbol('Bool'),
   [[Symbol('and'), Symbol('depth1'), Symbol('depth1')],
    [Symbol('not'), Symbol('depth1')],
    [Symbol('or'), Symbol('depth1'), Symbol('depth1')],
    [Symbol('xor'), Symbol('depth1'), Symbol('depth1')]]],
  [Symbol('depth1'),
   Symbol('Bool'),
   [[Symbol('and'), Symbol('depth2'), Symbol('depth2')],
    [Symbol('not'), Symbol('depth2')],
    [Symbol('or'), Symbol('depth2'), Symbol('depth2')],
    [Symbol('xor'), Symbol('depth2'), Symbol('depth2')],
    Symbol('LN75')]],
  [Symbol('depth2'),
   Symbol('Bool'),
   [[Symbol('and'), Symbol('depth3'), Symbol('depth3')],
    [Symbol('not'), Symbol('depth3')],


In [43]:
CURCUIT_FUNCTION_DEFINITIONS = {}

for filename, sexps in CIRCUIT_PROBLEMS.items():
    CURCUIT_FUNCTION_DEFINITIONS[filename] = get_function_definitions(sexps)

pprint(CURCUIT_FUNCTION_DEFINITIONS["CrCy_10-sbox2-D7-sIn1.sl"])

[[Symbol('define-fun'),
  Symbol('origCir'),
  [[Symbol('LN17'), Symbol('Bool')],
   [Symbol('k7'), Symbol('Bool')],
   [Symbol('LN34'), Symbol('Bool')],
   [Symbol('LN40'), Symbol('Bool')],
   [Symbol('LN47'), Symbol('Bool')],
   [Symbol('LN56'), Symbol('Bool')],
   [Symbol('LN65'), Symbol('Bool')],
   [Symbol('LN75'), Symbol('Bool')]],
  Symbol('Bool'),
  [Symbol('xor'),
   [Symbol('xor'),
    [Symbol('xor'),
     [Symbol('xor'),
      [Symbol('and'),
       Symbol('LN40'),
       [Symbol('xor'),
        [Symbol('xor'), Symbol('k7'), Symbol('LN17')],
        Symbol('LN34')]],
      Symbol('LN56')],
     Symbol('LN65')],
    Symbol('LN47')],
   Symbol('LN75')]]]


In [45]:
def generate_circuit_examples(filename: str, n=5):
    synth_fun = CIRCUIT_SYNTH_FUNS[filename]
    function_definitions = "\n".join([sexp.dumps(fd) for fd in CURCUIT_FUNCTION_DEFINITIONS[filename]])
    constraints = "\n".join([sexp.dumps(c) for c in  CIRCUIT_CONSTRAINTS[filename]])
    natural_language_spec = "\n".join(CIRCUIT_COMMENTS[filename])
    user_message = f"""[GRAMMAR]
{sexp.dumps(synth_fun)}

[NATURAL LANGUAGE SPECIFICATION]
{natural_language_spec}

[FUNCTION DEFINITIONS]
{function_definitions}

[CONSTRAINTS]
{constraints}"""
    completion = OPENAI.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": EXAMPLE_GENERATION_SYSTEM_PROMPT},
            {"role": "user", "content": user_message},
        ],
        tools=[TOOL],
        tool_choice=TOOL,
        n=n
    )
    examples = []
    for choice in completion.choices:
        try:
            arguments = json.loads(choice.message.tool_calls[0].function.arguments)
            examples.extend(arguments["examples"])
        except Exception as e:
            print(e)
            continue
    return examples

if os.path.exists("./CIRCUIT_GENERATED_EXAMPLES.pkl"):
    with open("./CIRCUIT_GENERATED_EXAMPLES.pkl", "r") as f:
        CIRCUIT_GENERATED_EXAMPLES = pickle.load(f)
else:
    CIRCUIT_GENERATED_EXAMPLES = {}
    for filename in CIRCUIT_SYNTH_FUNS.keys():
        print(filename)
        CIRCUIT_GENERATED_EXAMPLES[filename] = generate_circuit_examples(filename, n=5)
    with open("./CIRCUIT_GENERATED_EXAMPLES.pkl", "w") as f:
        pickle.dump(CIRCUIT_GENERATED_EXAMPLES, f)

CrCy_10-sbox2-D7-sIn1.sl
CrCy_6-P10-D7-sIn.sl
CrCy_6-P10-D7-sIn3.sl
CrCy_10-sbox2-D5-sIn104.sl
CrCy_9-pprmAll-D7-sIn1.sl
CrCy_6-P10-D5-sIn.sl
CrCy_10-sbox2-D5-sIn88.sl
CrCy_8-P12-D7-sIn5.sl
CrCy_10-sbox2-D5-sIn89.sl
CrCy_2-P6_2-P6.sl
CrCy_6-P10-D7-sIn5.sl
CrCy_8-P12-D7-sIn1.sl
CrCy_10-sbox2-D5-sIn79.sl
CrCy_10-sbox2-D5-sIn78.sl
CrCy_10-sbox2-D5-sIn91.sl
CrCy_6-P10-D9-sIn5.sl
CrCy_8-P12-D5-sIn1.sl
CrCy_8-P12-D9-sIn1.sl
CrCy_10-sbox2-D5-sIn90.sl
CrCy_10-sbox2-D5-sIn92.sl
CrCy_10-sbox2-D5-sIn15.sl
CrCy_8-P12-D5-sIn3.sl
CrCy_10-sbox2-D5-sIn14.sl
CrCy_10-sbox2-D5-sIn80.sl
CrCy_10-sbox2-D5-sIn77.sl
CrCy_6-P10-D9-sIn3.sl
CrCy_6-P10-D5-sIn3.sl
CrCy_9-pprmAll-D5-sIn1.sl
CrCy_9-pprmAll-D9-sIn1.sl
CrCy_10-sbox2-D5-sIn76.sl
CrCy_6-P10-D9-sIn.sl


TypeError: write() argument must be str, not bytes

In [46]:
pprint(CIRCUIT_GENERATED_EXAMPLES)

{'CrCy_10-sbox2-D5-sIn104.sl': [{'inputs': ['true',
                                            'true',
                                            'true',
                                            'true',
                                            'true',
                                            'true'],
                                 'output': 'false'},
                                {'inputs': ['true',
                                            'false',
                                            'true',
                                            'true',
                                            'true',
                                            'true'],
                                 'output': 'false'},
                                {'inputs': ['false',
                                            'true',
                                            'false',
                                            'false',
                                            'false',
   

In [47]:
# write to json
CIRCUIT_GENERATED_EXAMPLES_FILE = ROOT_DIRECTORY / "sygus/circuit-generated-examples.json"
CIRCUIT_GENERATED_EXAMPLES_FILE.write_text(json.dumps(CIRCUIT_GENERATED_EXAMPLES, indent=2))

372975