# Sygus completions

In [1]:
import os
import sys
from pathlib import Path

CURRENT_DIRECTORY = Path(os.getcwd())
ROOT_DIRECTORY = (CURRENT_DIRECTORY / "..").absolute().resolve()

print(f"Current directory: {CURRENT_DIRECTORY}")
print(f"Root directory: {ROOT_DIRECTORY}")

sys.path.append(str(ROOT_DIRECTORY))

Current directory: /home/ubuntu/arga-arc/sygus
Root directory: /home/ubuntu/arga-arc


In [2]:
from sygus.utils import sample_gpt_solutions, SygusProblem, SygusBenchmark, cleanup_completion, add_sygus_prefix

Current directory: /home/ubuntu/arga-arc/sygus
Root directory: /home/ubuntu/arga-arc


In [3]:
import typing as t
from pprint import pprint
from dataclasses import dataclass
import sexpdata as sexp
from sexpdata import Symbol
import json
import random
import math
import re

In [4]:
BENCHMARKS_DIRECTORY = ROOT_DIRECTORY / "sygus/Probe/src/test/benchmarks"
CIRCUIT_DIRECTORY = BENCHMARKS_DIRECTORY / "circuit/test"
HACKERS_DELIGHT_DIRECTORY = BENCHMARKS_DIRECTORY / "hackers-delight"
LARGER_STRING_GRAMMAR_DIRECTORY = BENCHMARKS_DIRECTORY / "larger-grammar"
STRING_GRAMMAR_DIRECTORY = BENCHMARKS_DIRECTORY / "string"

## testing completion cleanup and parsing

In [None]:
EXAMPLES = [
    '```python\n(define-fun f ((_arg_0 String)) String (str.substr _arg_0 (+ (str.indexof _arg_0 ".") 1) (str.len _arg_0)))\n```',
    '\n  (str.substr _arg_0 (+ (str.indexof _arg_0 ".") 1) (str.len _arg_0))\n)',
    '`(str.substr _arg_0 (+ (str.indexof _arg_0 ".") 1) (str.len _arg_0))`',
    '(define-fun f ((_arg_0 String)) String (str.++ (str.++ \"R/\" (int.to.str (str.to.int (str.substr _arg_0 5 3)))) \" \" (int.to.str (str.to.int (str.substr _arg_0 9 3))) \" \" (int.to.str (str.to.int (str.substr _arg_0 13 3)))))',
    ") (str.replace _arg_0 \",\" \"\") \",\")",
    "```python\n(str.substr name 6 3)\n```"
]

for example in EXAMPLES:
    print("original")
    print(example)
    print("cleaned")
    print(cleanup_completion(example))
    print()

In [None]:
EXAMPLES = [
    ('(define-fun f ((_arg_0 String)) String (str.++ (str.++ \"R/\" (int.to.str (str.to.int (str.substr _arg_0 5 3)))) \" \" (int.to.str (str.to.int (str.substr _arg_0 9 3))) \" \" (int.to.str (str.to.int (str.substr _arg_0 13 3)))))', '(define-fun f (_arg_0 String) String'),
    ("(str.replace _arg_0 \",\" \"\") \",\")", '(define-fun f (_arg_0 String) String')
]

for completion, prefix in EXAMPLES:
    print("original")
    print(completion)
    print("with prefix")
    print(add_sygus_prefix(completion, prefix))
    print()


## Generating Completions

### Larger String Grammar

In [5]:
LARGER_STRING_GRAMMAR_OUTPUT_FILE = ROOT_DIRECTORY / "sygus/larger-string-grammar-completions.deepseek.json"

In [6]:
if LARGER_STRING_GRAMMAR_OUTPUT_FILE.exists():
    LARGER_STRING_BENCHMARK = SygusBenchmark.read_from_file(LARGER_STRING_GRAMMAR_OUTPUT_FILE, LARGER_STRING_GRAMMAR_DIRECTORY)
else:
    LARGER_STRING_BENCHMARK = SygusBenchmark(
        "larger-string",
        LARGER_STRING_GRAMMAR_DIRECTORY
    )

In [7]:
print(LARGER_STRING_BENCHMARK.sygus["phone-5-long-repeatmodified.sl"].user_message)

[GRAMMAR]
(synth-fun f ((name String)) String ((Start String (ntString)) (ntString String (name " " "+" "-" "." (str.++ ntString ntString) (str.replace ntString ntString ntString) "" " " "BRD" "DRS" "LDS" "Branding" "Direct Response" "Leads" "=" "/" "in" "_" "9" "." "microsoft" "windows" "apple" "mac" "-" "1" "2" "3" "4" "5" "6" "7" "8" "0" "," "<" ">" "/n" "%" "b" "apple" "bananas" "strawberries" "oranges" "LLC" "Inc" "Corporation" "Enterprises" "Company" "(" ")" "+" "name" "," (int.to.str ntInt) (ite ntBool ntString ntString) (str.substr ntString ntInt ntInt))) (ntInt Int (0 1 2 3 4 5 (+ ntInt ntInt) (- ntInt ntInt) (str.len ntString) -1 1 2 3 4 5 6 7 8 9 0 (str.indexof ntString ntString ntInt))) (ntBool Bool (true false (= ntInt ntInt) (str.prefixof ntString ntString) (str.suffixof ntString ntString) (str.contains ntString ntString)))))

[EXAMPLES]
+130 517-953-149 -> 130
+188 280-087-526 -> 188
+10 538-347-401 -> 10
+95 620-820-945 -> 95
+5 528-317-854 -> 5


[SOLUTION]
(define-fun

In [8]:
print(LARGER_STRING_BENCHMARK.sygus["exceljet2modified.sl"].user_message)

[GRAMMAR]
(synth-fun f ((_arg_0 String)) String ((Start String (ntString)) (ntString String (_arg_0 "" " " "BRD" "DRS" "LDS" "Branding" "Direct Response" "Leads" "=" "/" "in" "_" "9" "." "microsoft" "windows" "apple" "mac" "-" "1" "2" "3" "4" "5" "6" "7" "8" "0" "," "<" ">" "/n" "%" "b" "apple" "bananas" "strawberries" "oranges" "LLC" "Inc" "Corporation" "Enterprises" "Company" "(" ")" "+" "name" "," (str.++ ntString ntString) (str.replace ntString ntString ntString) (str.at ntString ntInt) (int.to.str ntInt) (ite ntBool ntString ntString) (str.substr ntString ntInt ntInt))) (ntInt Int (-1 1 2 3 4 5 6 7 8 9 0 1 0 -1 (+ ntInt ntInt) (- ntInt ntInt) (str.len ntString) (str.to.int ntString) (ite ntBool ntInt ntInt) (str.indexof ntString ntString ntInt))) (ntBool Bool (true false (= ntInt ntInt) (str.prefixof ntString ntString) (str.suffixof ntString ntString) (str.contains ntString ntString)))))

[NATURAL LANGUAGE SPECIFICATION]
; https=//exceljet.net/formula/get-top-level-domain-tld

[EX

In [8]:
for filename, problem in LARGER_STRING_BENCHMARK.sygus.items():
    pprint(filename)
    pprint(problem.signature)
    pprint(problem.examples)

'exceljet2modified.sl'
('f', [('_arg_0', 'String')], 'String')
[(['www.domain.com'], 'com'),
 (['mail.net'], 'net'),
 (['www.amaon.co.uk'], 'uk')]
'31753108modified.sl'
('f', [('_arg_0', 'String')], 'String')
[(['Tire Pressure ABC123873 Monitor'], 'ABC123873'),
 ([' Oil Life ABC849999999021 gauge'], 'ABC849999999021'),
 ([' Air conditioner GHF211 maintenance'], 'GHF211')]
'find-nth-occurrence-of-charactermodified.sl'
('f', [('_arg_0', 'String'), ('_arg_1', 'Int')], 'Int')
[(['replies to _aya, _tasisuke, and _chan', '1'], 12),
 (['replies to _aya, _tasisuke, and _chan', '2'], 18),
 (['replies to _aya, _tasisuke, and _chan', '3'], 33)]
'clean-and-reformat-telephone-numbersmodified.sl'
('f', [('_arg_0', 'String')], 'String')
[(['801-456-8765'], '8014568765'),
 (['<978> 654-0299'], '9786540299'),
 (['978.654.0299'], '9786540299')]
'17212077modified.sl'
('f', [('_arg_0', 'String')], 'String')
[(['01/15/2013'], '01/2013'),
 (['03/07/2011'], '03/2011'),
 (['05/09/2009'], '05/2009')]
'11604909

In [9]:
LARGER_STRING_BENCHMARK.sample_solutions(model="deepseek-ai/deepseek-coder-33b-instruct", n=20, output_file=LARGER_STRING_GRAMMAR_OUTPUT_FILE)

Sampling completions for 31753108modified.sl
Error generating completions for 31753108modified.sl: <!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js ie6 oldie" lang="en-US"> <![endif]-->
<!--[if IE 7]>    <html class="no-js ie7 oldie" lang="en-US"> <![endif]-->
<!--[if IE 8]>    <html class="no-js ie8 oldie" lang="en-US"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en-US"> <!--<![endif]-->
<head>


<title>api.together.xyz | 524: A timeout occurred</title>
<meta charset="UTF-8" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
<meta name="robots" content="noindex, nofollow" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<link rel="stylesheet" id="cf_styles-css" href="/cdn-cgi/styles/main.css" />


</head>
<body>
<div id="cf-wrapper">
    <div id="cf-error-details" class="p-0">
        <header class="mx-auto pt-10 lg:pt-6 lg:px-8 w-240 lg:w-full mb-8">
           

In [None]:
LARGER_STRING_BENCHMARK.write(LARGER_STRING_GRAMMAR_OUTPUT_FILE)

### String Grammar

In [None]:
STRING_BENCHMARK_OUTPUT_FILE = ROOT_DIRECTORY / "sygus/string-grammar-completions.gpt-3.5-turbo.json"

In [None]:
if STRING_BENCHMARK_OUTPUT_FILE.exists():
    STRING_BENCHMARK = SygusBenchmark.read_from_file(STRING_BENCHMARK_OUTPUT_FILE, STRING_GRAMMAR_DIRECTORY)
else:
    STRING_BENCHMARK = SygusBenchmark(
        STRING_GRAMMAR_DIRECTORY
    )

In [None]:
print(STRING_BENCHMARK.sygus["get-first-name-from-name-with-comma.sl"].user_message)

In [None]:
for filename, problem in STRING_BENCHMARK.sygus.items():
    pprint(filename)
    pprint(problem.signature)
    pprint(problem.examples)
    pprint(len(STRING_BENCHMARK.output.get(filename, {
        "completions": []
    })["completions"]))

In [None]:
STRING_BENCHMARK.sample_solutions(model="gpt-3.5-turbo", n=20)

In [None]:

STRING_BENCHMARK.write(STRING_BENCHMARK_OUTPUT_FILE)

### Circuit

In [None]:
CIRCUIT_OUTPUT_FILE = ROOT_DIRECTORY / "sygus/circuit-completions.gpt-3.5-turbo.json"

In [None]:
CIRCUIT_EXAMPLES_FILE = ROOT_DIRECTORY / "sygus/io-results-circuit.json"
CIRCUIT_EXAMPLES_JSON = json.loads(CIRCUIT_EXAMPLES_FILE.read_text())
pprint(CIRCUIT_EXAMPLES_JSON["CrCy_10-sbox2-D5-sIn79.sl"])

CIRCUIT_EXAMPLES = {}

for filename, examples in CIRCUIT_EXAMPLES_JSON.items():
    example_tuples = [(example["inputs"], example["output"]) for example in examples]
    CIRCUIT_EXAMPLES[filename] = random.sample(example_tuples, 10) if len(example_tuples) > 10 else example_tuples

pprint(CIRCUIT_EXAMPLES["CrCy_10-sbox2-D5-sIn79.sl"])

In [None]:
if CIRCUIT_OUTPUT_FILE.exists():
    CIRCUIT_BENCHMARK = SygusBenchmark.read_from_file(CIRCUIT_OUTPUT_FILE, CIRCUIT_DIRECTORY, CIRCUIT_EXAMPLES)
else:
    CIRCUIT_BENCHMARK = SygusBenchmark(
        CIRCUIT_DIRECTORY, CIRCUIT_EXAMPLES
    )

In [None]:
print(CIRCUIT_BENCHMARK.sygus["CrCy_10-sbox2-D5-sIn79.sl"].user_message)

In [None]:
for filename, problem in CIRCUIT_BENCHMARK.sygus.items():
    pprint(filename)
    pprint(problem.signature)
    pprint(problem.examples)

In [None]:
CIRCUIT_BENCHMARK.sample_solutions(model="gpt-3.5-turbo", n=20)

In [None]:
CIRCUIT_BENCHMARK.write(CIRCUIT_OUTPUT_FILE)

### Hacker's Delight

In [None]:
HACKERS_DELIGHT_OUTPUT_FILE = ROOT_DIRECTORY / "sygus/hackers-delight-completions.gpt-3.5-turbo.json"

In [None]:
HACKERS_DELIGHT_EXAMPLE_FILE = ROOT_DIRECTORY / "sygus/io-results-bitvec.json"
HACKERS_DELIGHT_EXAMPLES_JSON = json.loads(HACKERS_DELIGHT_EXAMPLE_FILE.read_text())
pprint(HACKERS_DELIGHT_EXAMPLES_JSON["hd-08.sl"])

HACKERS_DELIGHT_EXAMPLES = {}

for filename, examples in HACKERS_DELIGHT_EXAMPLES_JSON.items():
    example_tuples = [
        (example["inputs"], example["output"]) for example in examples
    ]
    HACKERS_DELIGHT_EXAMPLES[filename] = random.sample(example_tuples, 10) if len(example_tuples) > 10 else example_tuples

pprint(HACKERS_DELIGHT_EXAMPLES["hd-08.sl"])

In [None]:
if HACKERS_DELIGHT_OUTPUT_FILE.exists():
    HACKERS_DELIGHT_BENCHMARK = SygusBenchmark.read_from_file(HACKERS_DELIGHT_OUTPUT_FILE, HACKERS_DELIGHT_DIRECTORY, HACKERS_DELIGHT_EXAMPLES)
else:
    HACKERS_DELIGHT_BENCHMARK = SygusBenchmark(
        HACKERS_DELIGHT_DIRECTORY, HACKERS_DELIGHT_EXAMPLES
    )

In [None]:
print(HACKERS_DELIGHT_BENCHMARK.sygus["hd-08.sl"].user_message)

In [None]:
for filename, problem in HACKERS_DELIGHT_BENCHMARK.sygus.items():
    pprint(filename)
    pprint(problem.signature)
    pprint(problem.examples)

In [None]:
HACKERS_DELIGHT_BENCHMARK.sample_solutions(model="gpt-3.5-turbo", n=20)

In [None]:
HACKERS_DELIGHT_BENCHMARK.write(HACKERS_DELIGHT_OUTPUT_FILE)