# Exploring the SMCalFlow Syntax

Exploring syntax and supported methods

In [1]:
from dataflow.core.lispress import *
from dataflow.core.linearize import *
from typeguard import check_type
import json
from pprint import pprint
from onmt.bin.translate import translate
import subprocess

### `parse_lispress` converts a string into a Lispress object, which is a nested list-of-lists with `str`s as leaves.

In [2]:
utterance: str = "what is my appointment with janice kang"
lisp_str: str = """(Yield
  (Event.id
    (singleton
      (QueryEventResponse.results
        (FindEventWrapperWithDefaults
          (Event.attendees_?
            (AttendeeListHasRecipientConstraint
              (RecipientWithNameLike
                (^(Recipient) EmptyStructConstraint)
                (PersonName.apply "janice kang")))))))))
"""
lispress = parse_lispress(lisp_str)
print(Lispress)
check_type('lispress', lispress, Lispress)
lispress

typing.Union[str, typing.List[ForwardRef('Sexp')]]


['Yield',
 ['Event.id',
  ['singleton',
   ['QueryEventResponse.results',
    ['FindEventWrapperWithDefaults',
     ['Event.attendees_?',
      ['AttendeeListHasRecipientConstraint',
       ['RecipientWithNameLike',
        [['^', ['Recipient'], 'EmptyStructConstraint']],
        ['PersonName.apply', '"janice kang"']]]]]]]]]

### `render_pretty` and `render_compact` reverse, w/ appropriately formatted string

In [3]:
print(render_compact(lispress), "\n")
print(render_pretty(lispress))
assert render_pretty(lispress) == lisp_str.strip(), render_pretty(lispress).strip()

(Yield (Event.id (singleton (QueryEventResponse.results (FindEventWrapperWithDefaults (Event.attendees_? (AttendeeListHasRecipientConstraint (RecipientWithNameLike (^(Recipient) EmptyStructConstraint) (PersonName.apply "janice kang"))))))))) 

(Yield
  (Event.id
    (singleton
      (QueryEventResponse.results
        (FindEventWrapperWithDefaults
          (Event.attendees_?
            (AttendeeListHasRecipientConstraint
              (RecipientWithNameLike
                (^(Recipient) EmptyStructConstraint)
                (PersonName.apply "janice kang")))))))))


### `lispress_to_program` and `program_to_lispress` convert to and from a `Program` object, "which is closer to a computation DAG (rather than an abstract syntax tree), and is sometimes more convenient to work with".

In [4]:
program, expression_count = lispress_to_program(lispress, 0)
help(lispress_to_program)
help(Program)

Help on function lispress_to_program in module dataflow.core.lispress:

lispress_to_program(lispress: Union[str, List[ForwardRef('Sexp')]], idx: int) -> Tuple[dataflow.core.program.Program, int]
    Converts Lispress to a Program with ids starting at `idx`.
    Returns the last id used along with the Program.

Help on class Program in module dataflow.core.program:

class Program(builtins.object)
 |  Program(expressions: List[dataflow.core.program.Expression]) -> None
 |  
 |  Program(expressions: List[dataflow.core.program.Expression])
 |  
 |  Methods defined here:
 |  
 |  __delattr__(self, name)
 |  
 |  __eq__(self, other)
 |  
 |  __hash__(self)
 |  
 |  __init__(self, expressions: List[dataflow.core.program.Expression]) -> None
 |  
 |  __repr__(self)
 |  
 |  __setattr__(self, name, value)
 |  
 |  expressions_by_id = <cached_property.cached_property object>
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __d

In [5]:
print(program.expressions_by_id['[1]'])
seq_to_program(program_to_seq(program), 0)

Expression(id='[1]', op=BuildStructOp(op_schema='EmptyStructConstraint', op_fields=[], empty_base=True, push_go=True), type_args=[Recipient], type=None, arg_ids=[])


(Program(expressions=[Expression(id='[1]', op=BuildStructOp(op_schema='EmptyStructConstraint', op_fields=[], empty_base=True, push_go=True), type_args=[Recipient], type=None, arg_ids=[]), Expression(id='[2]', op=ValueOp(value='{"schema": "String", "underlying": "janice kang"}'), type_args=None, type=None, arg_ids=[]), Expression(id='[3]', op=BuildStructOp(op_schema='PersonName.apply', op_fields=[None], empty_base=True, push_go=True), type_args=None, type=None, arg_ids=['[2]']), Expression(id='[4]', op=BuildStructOp(op_schema='RecipientWithNameLike', op_fields=[None, None], empty_base=True, push_go=True), type_args=None, type=None, arg_ids=['[1]', '[3]']), Expression(id='[5]', op=BuildStructOp(op_schema='AttendeeListHasRecipientConstraint', op_fields=[None], empty_base=True, push_go=True), type_args=None, type=None, arg_ids=['[4]']), Expression(id='[6]', op=BuildStructOp(op_schema='Event.attendees_?', op_fields=[None], empty_base=True, push_go=True), type_args=None, type=None, arg_ids=[

In [6]:
with open('../output/dataflow_dialogues/train.dataflow_dialogues.jsonl', 'r') as f:
    example = json.loads(f.readlines()[11])

## Example Dialogue

Dialogues can be read from a JSON file at: `./output/dataflow_dialogues/<split>.dataflow_dialogues.jsonl`

Each dialogue is a line in the file. Within a dialogue, a turn is user-agent utterance pair:

In [7]:
for i, t in enumerate(example['turns']):
    print(f"Turn {i}: User: {t['user_utterance']['original_text']}\n       Agent: {t['agent_utterance']['original_text']}")
    

Turn 0: User: What time is my appointment with Jerri Skinner on Friday?
       Agent: I didn't find any events with Jerri Skinner.
Turn 1: User: Can you add an appointment with Jerri Skinner at 9 am?
       Agent: Is this good?
Turn 2: User: I need it to last 2 hours.
       Agent: How's this?
Turn 3: User: good, thanks.
       Agent: I've put that on your calendar.


Each dialogue has an un-typed parse and a fully typed parse. If I understand right, the sequence to sequence model learns the untyped parse as the typed on can be inferrred deterministically using a "Hindley-Milner like" type-inference algorithm (This algorithm requires a *fully specified **library:*** the set of functions supported by the system the agent converses over).

Their existing code **does not support** execution of a program with a library (though their structure suggests there is a deterministic process for this). It is unclear if that process is proprietary, or if there are existing systems (e.g. lisp compilers) that could handle this trivially, given a fully specified library.

Even in their current representation, they do not support implementation of a function (a library is a set of signatures).

For specifics, a library is a `Dict[str, Definition]` where keys are function names and definitions are function signatures, described here:

```
class Definition:
    """A function signature. For example,
    Definition("foo", ["T"], [("arg1", TypeName("Long")), ("arg2", TypeName("T"))], TypeName("Double"))
    would be

    T = TypeVar("T")
    def foo(arg1: Long, arg2: T) -> Double:
        pass

    in Python, and

    (def ^(T) foo (^Long arg1 ^T arg2) ^Double ???)

    in Lispress. The ??? is the "body" of the def, much like `pass` in Python.
    It's slightly easier there's always a body because that's where return
    type annotations live right now.

    This class is currently only used in type_inference.py, but we might use
    it elsewhere too."""

    name: str
    type_params: List[str]
    params: List[Tuple[str, TypeName]]
    return_type: TypeName
```

### Semantic Parse Details:

Here are the details of the above dialogue's semantic parse:

In [8]:
for i, t in enumerate(example['turns']):
    turn_str = (
        f"Turn {i}: \n"
        f"User: {t['user_utterance']['original_text']}\n"
        f"Agent: {t['agent_utterance']['original_text']}\n"
        f"lispress: {render_pretty(parse_lispress(t['lispress']))}\n"
        f"expansion_lispress: {render_pretty(parse_lispress(t['expansion_lispress'])) if t['expansion_lispress'] else 'None'}"
    )
    print(turn_str)

Turn 0: 
User: What time is my appointment with Jerri Skinner on Friday?
Agent: I didn't find any events with Jerri Skinner.
lispress: (Yield
  (Event.start
    (singleton
      (QueryEventResponse.results
        (FindEventWrapperWithDefaults
          (EventOnDate
            (NextDOW (Friday))
            (Event.attendees_?
              (AttendeeListHasRecipientConstraint
                (RecipientWithNameLike
                  (^(Recipient) EmptyStructConstraint)
                  (PersonName.apply "Jerri Skinner"))))))))))
expansion_lispress: None
Turn 1: 
User: Can you add an appointment with Jerri Skinner at 9 am?
Agent: Is this good?
lispress: (Yield
  (CreateCommitEventWrapper
    (CreatePreflightEventWrapper
      (&
        (Event.start_? (?= (NextTime (NumberAM 9L))))
        (Event.attendees_?
          (AttendeeListHasRecipient
            (Execute
              (refer
                (extensionConstraint
                  (RecipientWithNameLike
                    (^(Re

# TODO: continue and check out execute_programs.py in MultiWoz section

## Trying out the model:

I was able to replicate their accuracy results, now trying a demo example directly here:

In [9]:
def simple_infer(utterance: str) -> str:
    options = {"model": "../output/models/smcalflow.full.checkpoint_last.pt", 
           "max_length": 491, "src": "output/onmt_text_data/valid.src_tok",
           "replace_unk": True, "n_best": 1, 
           "batch_size": 8, "beam_size": 10, "gpu": 1, "report_time": True, 
           "output": "/tmp/out.nbest"}
    with open("/tmp/out.nbest", "w") as f:
        # clear result first
        f.truncate(0)
    with open("/tmp/utterance.src_tok", "w") as f:
        f.write(utterance)
    options['src'] = "/tmp/utterance.src_tok"
    call: List[str] = ["onmt_translate"]
    for k, v in options.items():
        call.extend([("--" + k), str(v)])
        if type(v) == bool:
            call.pop() # boolean arguments are just flags, no value for them
    print(" ".join(call))
    subprocess.call(call)
    with open("/tmp/out.nbest", "r") as f:
        return parse_lispress(f.read())
print(render_pretty(simple_infer("what is my appointment with janice kang")))
print(render_pretty(lispress))

onmt_translate --model ../output/models/smcalflow.full.checkpoint_last.pt --max_length 491 --src /tmp/utterance.src_tok --replace_unk --n_best 1 --batch_size 8 --beam_size 10 --gpu 1 --report_time --output /tmp/out.nbest


[2021-12-10 11:04:50,077 INFO] Translating shard 0.
  var = torch.tensor(arr, dtype=self.dtype, device=device)


PRED AVG SCORE: -0.0082, PRED PPL: 1.0082
Total translation time (s): 0.246882
Average translation time (s): 0.246882
Tokens per second: 153.919859
(Yield
  :output (singleton
    (:results
      (FindEventWrapperWithDefaults
        :constraint (Constraint[Event]
          :attendees (AttendeeListHasRecipientConstraint
            :recipientConstraint (RecipientWithNameLike
              :constraint (Constraint[Recipient])
              :name #(PersonName " janice kang "))))))))
(Yield
  (Event.id
    (singleton
      (QueryEventResponse.results
        (FindEventWrapperWithDefaults
          (Event.attendees_?
            (AttendeeListHasRecipientConstraint
              (RecipientWithNameLike
                (^(Recipient) EmptyStructConstraint)
                (PersonName.apply "janice kang")))))))))


In [10]:
print(render_pretty(simple_infer("do I have any meetings today before 12pm ?")))


onmt_translate --model ../output/models/smcalflow.full.checkpoint_last.pt --max_length 491 --src /tmp/utterance.src_tok --replace_unk --n_best 1 --batch_size 8 --beam_size 10 --gpu 1 --report_time --output /tmp/out.nbest


[2021-12-10 11:04:55,200 INFO] Translating shard 0.


PRED AVG SCORE: -0.0088, PRED PPL: 1.0088
Total translation time (s): 0.230049
Average translation time (s): 0.230049
Tokens per second: 160.835207


  var = torch.tensor(arr, dtype=self.dtype, device=device)


(Yield
  :output (>
    (size
      (:results
        (FindEventWrapperWithDefaults
          :constraint (EventOnDateBeforeTime
            :date (Today)
            :event (Constraint[Event])
            :time (Noon)))))
    #(Number 0)))


In [11]:
simple = "Can you book a meeting at 3pm this afternoon with Matt to discuss status ?"
my_prediction = """
(Yield
  (CreateCommitEventWrapper
    (CreatePreflightEventWrapper
      (&
        (Event.start_? (?= (NextTime (NumberPM 3L))))
        (Event.name_? (?= "discuss status"))
        (Event.attendees_?
          (AttendeeListHasRecipient
            (Execute
              (refer
                (extensionConstraint
                  (RecipientWithNameLike
                    (^(Recipient) EmptyStructConstraint)
                    (PersonName.apply "Matt")))))))))))
"""
print(render_pretty(simple_infer(simple)))


onmt_translate --model ../output/models/smcalflow.full.checkpoint_last.pt --max_length 491 --src /tmp/utterance.src_tok --replace_unk --n_best 1 --batch_size 8 --beam_size 10 --gpu 1 --report_time --output /tmp/out.nbest


[2021-12-10 11:05:00,165 INFO] Translating shard 0.
  var = torch.tensor(arr, dtype=self.dtype, device=device)


PRED AVG SCORE: -0.0036, PRED PPL: 1.0036
Total translation time (s): 0.496082
Average translation time (s): 0.496082
Tokens per second: 161.263718
(Yield
  :output (CreateCommitEventWrapper
    :event (CreatePreflightEventWrapper
      :constraint (Constraint[Event]
        :attendees (AttendeeListHasRecipient
          :recipient (Execute
            :intension (refer
              (extensionConstraint
                (RecipientWithNameLike
                  :constraint (Constraint[Recipient])
                  :name #(PersonName " Matt "))))))
        :location (?= #(LocationKeyphrase " 3pm "))
        :start (DateTimeConstraint :constraint (Afternoon) :date (Today))
        :subject (?= #(String " discuss status "))))))


In [12]:
## Results from training