In [2]:
import os
import sys
from pathlib import Path

CURRENT_DIRECTORY = Path(os.getcwd())
ROOT_DIRECTORY = (CURRENT_DIRECTORY / "..").absolute().resolve()

print(f"Current directory: {CURRENT_DIRECTORY}")
print(f"Root directory: {ROOT_DIRECTORY}")

sys.path.append(str(ROOT_DIRECTORY))

Current directory: /home/ubuntu/arga-arc/sygus
Root directory: /home/ubuntu/arga-arc


In [3]:
import typing as t
from pprint import pprint
from dataclasses import dataclass
import sexpdata as sexp
from sexpdata import Symbol
from openai import OpenAI
from config import CONFIG
from datetime import datetime
import json
import random
import math

OPENAI = OpenAI(organization=CONFIG.OPENAI_ORGANIZATION, api_key=CONFIG.OPENAI_SECRET_KEY)
client = OpenAI(organization=CONFIG.OPENAI_ORGANIZATION, api_key=CONFIG.OPENAI_SECRET_KEY)

In [7]:
class CompletionJSON(t.TypedDict):
    solutions: t.List[str]
    time_ms: float

In [16]:
TEST_S_EXPRESSIONS = [
    sexp.loads("(define-fun f (name String) String\n(str.++ (str.substr name 0 4) \"(\" (str.substr name 4 3) \")\" (str.substr name 7 (- (str.len name) 7))))")
]

SExpItem = t.Union[
    sexp.Symbol,
    str,
    int
]
SExp = t.Union[SExpItem,t.List["SExp"]]

def walk(s: SExp):
    prune = yield s
    if prune is None:
        prune = False
    if prune:
        return
    
    if isinstance(s, list):
        for i in s:
            yield from walk(i)


for s in TEST_S_EXPRESSIONS:
    print("sexp")
    pprint(s)
    print("walk")
    for node in walk(s):
        pprint(node)

sexp
[[Symbol('define-fun'),
  Symbol('f'),
  [Symbol('name'), Symbol('String')],
  Symbol('String'),
  [Symbol('str.++'),
   [Symbol('str.substr'), Symbol('name'), 0, 4],
   '(',
   [Symbol('str.substr'), Symbol('name'), 4, 3],
   ')',
   [Symbol('str.substr'),
    Symbol('name'),
    7,
    [Symbol('-'), [Symbol('str.len'), Symbol('name')], 7]]]]]
walk
[[Symbol('define-fun'),
  Symbol('f'),
  [Symbol('name'), Symbol('String')],
  Symbol('String'),
  [Symbol('str.++'),
   [Symbol('str.substr'), Symbol('name'), 0, 4],
   '(',
   [Symbol('str.substr'), Symbol('name'), 4, 3],
   ')',
   [Symbol('str.substr'),
    Symbol('name'),
    7,
    [Symbol('-'), [Symbol('str.len'), Symbol('name')], 7]]]]]
[Symbol('define-fun'),
 Symbol('f'),
 [Symbol('name'), Symbol('String')],
 Symbol('String'),
 [Symbol('str.++'),
  [Symbol('str.substr'), Symbol('name'), 0, 4],
  '(',
  [Symbol('str.substr'), Symbol('name'), 4, 3],
  ')',
  [Symbol('str.substr'),
   Symbol('name'),
   7,
   [Symbol('-'), [Symbo

In [28]:
TEST_REPAIR_COMPLETIONS = [
    "(define-fun f (name String) String\n(str.substr name 4 3)",
    """(define-fun f (_arg_0 String) String
(str.replace _arg_0 "1" "")
(str.replace _arg_0 "2" "")
(str.replace _arg_0 "3" "")
(str.replace _arg_0 "4" "")
(str.replace _arg_0 "5" "")
(str.replace _arg_0 "6" "")
(str.replace _arg_0 "7" "")
(str.replace _arg_0 "8" "")
(str.replace _arg_0 "9" "")
(str.replace _arg_0 "0" "")))"""
]

def add_closing_bracket(completion: str) -> str:
    return completion + ")"

def remove_closing_bracket(completion: str) -> str:
    return completion[:-1]

def parse_and_repair(completion: str) -> SExp:
    try:
        parsed: SExp = sexp.loads(completion)
        return parsed
    except Exception as e:
        if "Not enough closing brackets." in  str(e):
            return parse_and_repair(add_closing_bracket(completion))
        if "Too many closing brackets." in str(e):
            return parse_and_repair(remove_closing_bracket(completion))
        else:
            raise e
        
for completion in TEST_REPAIR_COMPLETIONS:
    print("completion:")
    print(completion)
    print("repaired")
    pprint(parse_and_repair(completion))
    print()

completion:
(define-fun f (name String) String
(str.substr name 4 3)
repaired
[[Symbol('define-fun'),
  Symbol('f'),
  [Symbol('name'), Symbol('String')],
  Symbol('String'),
  [Symbol('str.substr'), Symbol('name'), 4, 3]]]

completion:
(define-fun f (_arg_0 String) String
(str.replace _arg_0 "1" "")
(str.replace _arg_0 "2" "")
(str.replace _arg_0 "3" "")
(str.replace _arg_0 "4" "")
(str.replace _arg_0 "5" "")
(str.replace _arg_0 "6" "")
(str.replace _arg_0 "7" "")
(str.replace _arg_0 "8" "")
(str.replace _arg_0 "9" "")
(str.replace _arg_0 "0" "")))
repaired
[[Symbol('define-fun'),
  Symbol('f'),
  [Symbol('_arg_0'), Symbol('String')],
  Symbol('String'),
  [Symbol('str.replace'), Symbol('_arg_0'), '1', ''],
  [Symbol('str.replace'), Symbol('_arg_0'), '2', ''],
  [Symbol('str.replace'), Symbol('_arg_0'), '3', ''],
  [Symbol('str.replace'), Symbol('_arg_0'), '4', ''],
  [Symbol('str.replace'), Symbol('_arg_0'), '5', ''],
  [Symbol('str.replace'), Symbol('_arg_0'), '6', ''],
  [Symbol('s

## string

In [41]:
COMPLETIONS_PATH = CURRENT_DIRECTORY / "string-grammar-completions.json"
COMPLETIONS: t.Dict[str, CompletionJSON] = json.loads(COMPLETIONS_PATH.read_text())

list(COMPLETIONS.keys())[:10]

['get-first-name-from-name-with-comma.sl',
 'strip-html-from-text-or-numbers.sl',
 'count-total-words-in-a-cell.sl',
 'stackoverflow8.sl',
 'phone-6-long.sl',
 'phone-10.sl',
 'phone-6.sl',
 'phone-7-short.sl',
 'phone-7.sl',
 'univ_6_short.sl']

In [42]:
TEST_COMPLETIONS = [
    "(define-fun f (name String) String\n(str.++ (str.substr name 0 4) \"(\" (str.substr name 4 3) \")\" (str.substr name 7 (- (str.len name) 7))))",
    "(define-fun f (name String) String\n(str.substr name 4 3)",
]


def get_constants(completion: str) -> t.List[str]:
    parsed = parse_and_repair(completion)
    return list(set([node for node in walk(parsed) if type(node) == str or type(node) == int]))

for completion in TEST_COMPLETIONS:
    print("completion:")
    print(completion)
    print("constants")
    print(get_constants(completion))
    print()


completion:
(define-fun f (name String) String
(str.++ (str.substr name 0 4) "(" (str.substr name 4 3) ")" (str.substr name 7 (- (str.len name) 7))))
constants
[0, 3, 4, 7, ')', '(']

completion:
(define-fun f (name String) String
(str.substr name 4 3)
constants
[3, 4]



In [43]:
for key, completions in COMPLETIONS.items():
    print(key)
    
    constants = []
    for completion in completions["solutions"]:
        try:
            constants.append(get_constants(completion))
        except Exception as e:
            print(e)
            print(completion)
            print()
            constants.append([])
            continue
    completions["constants"] = constants
    completions["all_constants"] = list(set(sum(constants, [])))

COMPLETIONS[list(COMPLETIONS.keys())[0]]

get-first-name-from-name-with-comma.sl
strip-html-from-text-or-numbers.sl
count-total-words-in-a-cell.sl
stackoverflow8.sl
phone-6-long.sl
phone-10.sl
phone-6.sl
phone-7-short.sl
phone-7.sl
univ_6_short.sl
stackoverflow9.sl
phone-9-long-repeat.sl
get-last-word.sl
phone-10_short.sl
17212077.sl
phone-5.sl
39060015.sl
phone-7-long-repeat.sl
38871714.sl
43120683.sl
stackoverflow11.sl
split-text-string-at-specific-character.sl
get-domain-name-from-url.sl
get-last-name-from-name-with-comma.sl
initials.sl
univ_5-long-repeat.sl
stackoverflow10.sl
phone-9-short.sl
phone-6-short.sl
43606446.sl
initials-long.sl
phone-5-long-repeat.sl
phone-7-long.sl
univ_6-long.sl
33619752.sl
extract-text-between-parentheses.sl
remove-leading-and-trailing-spaces-from-text.sl
strip-non-numeric-characters.sl
get-middle-name-from-full-name.sl
initials_small.sl
stackoverflow5.sl
extract-word-that-begins-with-specific-character.sl
35744094.sl
get-last-line-in-cell.sl
31753108.sl
exceljet4.sl
44789427.sl
stackoverflow4

{'solutions': ['(define-fun f (_arg_0 String _arg_1 Int) String\n(str.at (str.split "," _arg_0) (- _arg_1 1)))',
  '(define-fun f (_arg_0 String _arg_1 Int) String\n(let ((split (str.split _arg_0 ", ")))\n  (ite (= _arg_1 1) \n       (str.at split 1) \n       (str.at split 0))))',
  '(define-fun f (_arg_0 String _arg_1 Int) String\n(let ((s (str.split _arg_0 ",")))\n  (ite (= _arg_1 1) (str.trim (nth 1 s)) (str.trim (nth 0 s)))))',
  '(define-fun f (_arg_0 String _arg_1 Int) String\n(let ((split (str.substr _arg_0 0 (str.indexof _arg_0 "," 0))))\n  (ite (= _arg_1 2) split (str.substr _arg_0 (+ (str.len split) 1) (- (str.len _arg_0) (+ (str.len split) 1))))))',
  '(define-fun f (_arg_0 String _arg_1 Int) String\n(let ((s (str.split _arg_0 ", ")))\n  (if (= _arg_1 1) \n      (str.at s 1) \n      (str.at s 0))))',
  '(define-fun f (_arg_0 String _arg_1 Int) String\n(let ((names (str.split _arg_0 ",")))\n  (if (= _arg_1 1)\n      (str.trim (nth names 1))\n      (str.trim (nth names 0)))))'

In [44]:
OUTPUT_PATH = CURRENT_DIRECTORY / "string-grammar-completions-with-constants.json"
OUTPUT_PATH.write_text(json.dumps(COMPLETIONS, indent=2))

173290

## larger-string-grammar

In [33]:
COMPLETIONS_PATH = CURRENT_DIRECTORY / "larger-string-grammar-completions.json"
COMPLETIONS: t.Dict[str, CompletionJSON] = json.loads(COMPLETIONS_PATH.read_text())

list(COMPLETIONS.keys())[:10]

['exceljet2modified.sl',
 '31753108modified.sl',
 'find-nth-occurrence-of-charactermodified.sl',
 'clean-and-reformat-telephone-numbersmodified.sl',
 '17212077modified.sl',
 '11604909modified.sl',
 'stackoverflow4modified.sl',
 'remove-text-by-positionmodified.sl',
 '44789427modified.sl',
 'stackoverflow11modified.sl']

In [38]:
TEST_COMPLETIONS = [
    "(define-fun f (_arg_0 String) String\n(str.substr _arg_0 (+ (str.indexof _arg_0 \".\" (- (str.len _arg_0) 1)) 1) (- (str.len _arg_0) (+ (str.indexof _arg_0 \".\" (- (str.len _arg_0) 1)) 1))))",
    "(define-fun f (_arg_0 String _arg_1 Int) String\n(str.at (str.++ _arg_0 \" \") (+ (str.indexof (str.++ _arg_0 \" \") \" \" (- _arg_1 1)) 1)))",
    "(define-fun f (_arg_0 String) String\n(let ((first_space (str.indexof _arg_0 \" \" 0)) (second_space (str.indexof _arg_0 \" \" (+ (str.indexof _arg_0 \" \" 0) 1))))\n  (ite (= first_space -1) \"\"\n    (ite (= second_space -1) (str.substr _arg_0 (+ first_space 1) (- (str.len _arg_0) (+ first_space 1)))\n      (str.substr _arg_0 (+ first_space 1) (- second_space (+ first_space 1)))))))",
]

def get_constants(completion: str) -> t.List[str]:
    parsed = parse_and_repair(completion)
    return list(set([node for node in walk(parsed) if type(node) == str or type(node) == int]))

for completion in TEST_COMPLETIONS:
    print("completion:")
    print(completion)
    print("constants")
    print(get_constants(completion))
    print()

completion:
(define-fun f (_arg_0 String) String
(str.substr _arg_0 (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1) (- (str.len _arg_0) (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1))))
constants
[1, '.']

completion:
(define-fun f (_arg_0 String _arg_1 Int) String
(str.at (str.++ _arg_0 " ") (+ (str.indexof (str.++ _arg_0 " ") " " (- _arg_1 1)) 1)))
constants
[1, ' ']

completion:
(define-fun f (_arg_0 String) String
(let ((first_space (str.indexof _arg_0 " " 0)) (second_space (str.indexof _arg_0 " " (+ (str.indexof _arg_0 " " 0) 1))))
  (ite (= first_space -1) ""
    (ite (= second_space -1) (str.substr _arg_0 (+ first_space 1) (- (str.len _arg_0) (+ first_space 1)))
      (str.substr _arg_0 (+ first_space 1) (- second_space (+ first_space 1)))))))
constants
[0, 1, '', ' ', -1]



In [39]:
for key, completions in COMPLETIONS.items():
    print(key)
    
    constants = []
    for completion in completions["solutions"]:
        try:
            constants.append(get_constants(completion))
        except Exception as e:
            print(e)
            print(completion)
            print()
            constants.append([])
            continue
    completions["constants"] = constants
    completions["all_constants"] = list(set(sum(constants, [])))

COMPLETIONS[list(COMPLETIONS.keys())[0]]

exceljet2modified.sl
31753108modified.sl
find-nth-occurrence-of-charactermodified.sl
clean-and-reformat-telephone-numbersmodified.sl
17212077modified.sl
11604909modified.sl
stackoverflow4modified.sl
remove-text-by-positionmodified.sl
44789427modified.sl
stackoverflow11modified.sl
stackoverflow3modified.sl
initials-longmodified.sl
phone-7modified.sl
43606446modified.sl
extract-nth-word-from-text-stringmodified.sl
remove-leading-and-trailing-spaces-from-textmodified.sl
phone-5-long-repeatmodified.sl
count-total-words-in-a-cellmodified.sl
stackoverflow8modified.sl
strip-numeric-characters-from-cellmodified.sl
phone-9modified.sl
initialsmodified.sl
phone-10-long-repeatmodified.sl
39060015modified.sl
phone-7-long-repeatmodified.sl
stackoverflow6modified.sl
strip-non-numeric-charactersmodified.sl
stackoverflow1modified.sl
get-last-wordmodified.sl
phone-5modified.sl
phone-6-long-repeatmodified.sl
get-last-line-in-cellmodified.sl
phone-9-shortmodified.sl
38871714modified.sl
phone-5-shortmodifi

{'solutions': ['(define-fun f (_arg_0 String) String\n(str.substr _arg_0 (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1) (- (str.len _arg_0) (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1))))',
  '(define-fun f (_arg_0 String) String\n(str.substr _arg_0 (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1) (- (str.len _arg_0) (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1))))',
  '(define-fun f (_arg_0 String) String\n(str.substr _arg_0 (+ 1 (str.indexof _arg_0 "." (- (str.len _arg_0) 1))) (- (str.len _arg_0) (+ 1 (str.indexof _arg_0 "." (- (str.len _arg_0) 1))))))',
  '(define-fun f (_arg_0 String) String\n(str.substr _arg_0 (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1) (- (str.len _arg_0) (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1))))',
  '(define-fun f (_arg_0 String) String\n(str.substr _arg_0 (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1) (- (str.len _arg_0) (+ (str.indexof _arg_0 "." (- (str.len _arg_0) 1)) 1))))',
  '(define-fun f (_arg

In [40]:
OUTPUT_PATH = CURRENT_DIRECTORY / "larger-string-grammar-completions-with-constants.json"
OUTPUT_PATH.write_text(json.dumps(COMPLETIONS, indent=2))

163291

## hacker's delight

In [45]:
COMPLETIONS_PATH = CURRENT_DIRECTORY / "hackers-delight-completions.json"
COMPLETIONS: t.Dict[str, CompletionJSON] = json.loads(COMPLETIONS_PATH.read_text())

list(COMPLETIONS.keys())[:10]

['hd-25.sl',
 'hd-24.sl',
 'hd-09.sl',
 'hd-26.sl',
 'hd-27.sl',
 'hd-08.sl',
 'hd-23.sl',
 'hd-22.sl',
 'hd-18.sl',
 'hd-20.sl']

In [50]:
TEST_COMPLETIONS = [
    "(define-fun f (x (BitVec 64) y (BitVec 64)) (BitVec 64)\n(bvlshr (bvmul x y) \\#x0000000000000020))",
    "(define-fun f (x (BitVec 64) m (BitVec 64) k (BitVec 64)) (BitVec 64)\n(bvor (bvor (bvand (bvshl (bvand x m) k) m) (bvand (bvlshr (bvand x m) k) m)) (bvand x (bvnot m))))",
    "(define-fun f (x (BitVec 64) y (BitVec 64)) (BitVec 64)\n(bvudiv (bvadd x y) \\#x0000000000000002))"
]

def get_constants(completion: str) -> t.List[str]:
    parsed = parse_and_repair(completion)
    ans = list(set([
        node for node in walk(parsed) 
        if type(node) != list and 
        not (
            type(node) == sexp.Symbol and
            not node.value().startswith("#")
        )]))
    return [
        node.value() if type(node) == sexp.Symbol else node
        for node in ans
    ]

for completion in TEST_COMPLETIONS:
    print("completion:")
    print(completion)
    print("constants")
    print(get_constants(completion))
    print()

completion:
(define-fun f (x (BitVec 64) y (BitVec 64)) (BitVec 64)
(bvlshr (bvmul x y) \#x0000000000000020))
constants
[64, '#x0000000000000020']

completion:
(define-fun f (x (BitVec 64) m (BitVec 64) k (BitVec 64)) (BitVec 64)
(bvor (bvor (bvand (bvshl (bvand x m) k) m) (bvand (bvlshr (bvand x m) k) m)) (bvand x (bvnot m))))
constants
[64]

completion:
(define-fun f (x (BitVec 64) y (BitVec 64)) (BitVec 64)
(bvudiv (bvadd x y) \#x0000000000000002))
constants
[64, '#x0000000000000002']



In [51]:
for key, completions in COMPLETIONS.items():
    print(key)
    
    constants = []
    for completion in completions["solutions"]:
        try:
            constants.append(get_constants(completion))
        except Exception as e:
            print(e)
            print(completion)
            print()
            constants.append([])
            continue
    completions["constants"] = constants
    completions["all_constants"] = list(set(sum(constants, [])))

COMPLETIONS[list(COMPLETIONS.keys())[0]]

hd-25.sl
hd-24.sl
hd-09.sl
hd-26.sl
hd-27.sl
hd-08.sl
hd-23.sl
hd-22.sl
hd-18.sl
hd-20.sl
hd-21.sl
hd-19.sl
hd-07.sl
hd-10.sl
hd-11.sl
hd-06.sl
hd-04.sl
hd-13.sl
hd-12.sl
hd-05.sl
hd-16.sl
hd-01.sl
hd-17.sl
hd-15.sl
hd-02.sl
hd-03.sl
hd-14.sl


{'solutions': ['(define-fun f (x (BitVec 64) y (BitVec 64)) (BitVec 64)\n(bvlshr (bvmul x y) \\#x0000000000000020))',
  '(define-fun f (x (BitVec 64) y (BitVec 64)) (BitVec 64)\n(bvlshr (bvmul x y) \\#x0000000000000020))',
  '(define-fun f (x (BitVec 64) y (BitVec 64)) (BitVec 64)\n(bvlshr (bvmul x y) #x0000000000000020))',
  '(define-fun f (x (BitVec 64) y (BitVec 64)) (BitVec 64)\n(bvlshr (bvmul x y) \\#x0000000000000020))',
  '(define-fun f (x (BitVec 64) y (BitVec 64)) (BitVec 64)\n(bvlshr (bvmul x y) \\#x0000000000000020))',
  '(define-fun f (x (BitVec 64) y (BitVec 64)) (BitVec 64)\n(bvlshr (bvmul x y) \\#x0000000000000020))',
  '(define-fun f (x (BitVec 64) y (BitVec 64)) (BitVec 64)\n(bvlshr (bvmul x y) #x0000000000000020))',
  '(define-fun f (x (BitVec 64) y (BitVec 64)) (BitVec 64)\n(bvlshr (bvmul x y) #x0000000000000020))',
  '(define-fun f (x (BitVec 64) y (BitVec 64)) (BitVec 64)\n(bvlshr (bvmul x y) \\#x0000000000000020))',
  '(define-fun f (x (BitVec 64) y (BitVec 64)) (

In [53]:
OUTPUT_PATH = CURRENT_DIRECTORY / "hackers-delight-completions-with-constants.json"
OUTPUT_PATH.write_text(json.dumps(COMPLETIONS, indent=2))

57647