# OR list machine-readable comments

## Verify round-trip
```
YAML (text) from OR list [Python]
  -> Structured data (dict, list, int, float, str, Table) [Python]
    -> JSON (text) [Python -> MATLAB/Python]
      -> Structured data (dict, list, int, float, str, Table -> dict of list) [MATLAB/Python]
    <- JSON (text) [MATLAB/Python -> Python]
  <- Structured data (dict, list, int, float, str, Table) [Python]
<- YAML (text) round-trip [Python]
```

In [36]:
import io
import pprint
import re
import textwrap
import json

import ruamel.yaml
from astropy.table import Table
from pathlib import Path

In [2]:
def dedent_text(text):
    text = text.lstrip("\n")
    text = textwrap.dedent(text)
    return text


In [3]:
def table_constructor(loader, node):
    out = Table.read(node.value, format="ascii.fixed_width_two_line")
    return out

In [4]:
def yaml_loads(text):
    yaml = ruamel.yaml.YAML()
    yaml.Constructor.add_constructor("!table", table_constructor)
    data = yaml.load(text)
    return data


In [5]:
text = """\
test: !table |
  col1 col2
  ---- ----
     1    2
     3    4
"""
yaml = ruamel.yaml.YAML()
yaml.Constructor.add_constructor("!table", table_constructor)

dat = yaml.load(text)
print(dat)

ordereddict([('test', <Table length=2>
 col1  col2
int64 int64
----- -----
    1     2
    3     4)])


In [6]:
class ORListRepresenter(ruamel.yaml.representer.RoundTripRepresenter):
    # See https://stackoverflow.com/questions/76689402
    def __init__(self, default_style=None, default_flow_style=None, dumper=None):
        super().__init__(
            default_style=default_style,
            default_flow_style=False,
            dumper=dumper,
        )

    def represent_str(self, s):
        if "\n" in s:
            return self.represent_scalar(
                "tag:yaml.org,2002:str", dedent_text(s), style="|"
            )
        return self.represent_scalar("tag:yaml.org,2002:str", s)

    def represent_sequence(self, tag, sequence, flow_style=None):
        return super().represent_sequence(tag, sequence, flow_style=True)


def table_representer(dumper, data):
    out = io.StringIO()
    data.write(out, format="ascii.fixed_width_two_line")
    return dumper.represent_scalar("!table", out.getvalue(), style="|")


ORListRepresenter.add_representer(str, ORListRepresenter.represent_str)
ORListRepresenter.add_representer(Table, table_representer)

In [7]:
def yaml_dumps(data):
    yaml = ruamel.yaml.YAML()
    yaml.Representer = ORListRepresenter

    out = io.StringIO()
    yaml.dump(data, out)
    return out.getvalue()

In [8]:
def get_comment_blocks(lines, include_49999=False):
    """Get all the comment blocks in the file as a list of dict"""
    comment_blocks = []

    idx1 = 0
    for idx0, line in enumerate(lines):
        if idx0 < idx1:
            continue

        if match := re.match(
            r"BEGIN_COMMENT \s* , \s* ID \s* = \s* (\d+)", line, re.VERBOSE
        ):
            obsid = int(match.group(1))
            if obsid == 49999 and not include_49999:
                continue
            idx1 = lines.index("END_COMMENT", idx0)
            comment = "\n".join(lines[idx0 + 1 : idx1]) + "\n"
            comment_block = {
                "obsid": obsid,
                "idx0": idx0,
                "idx1": idx1,
                "comment": comment,
            }
            comment_blocks.append(comment_block)

    return comment_blocks

### Read machine-readable comments as a list of strings (text)

In [26]:
%%time

orlist_text = Path("example-yaml-1-2-3-4-5-6.or").read_text()
orlist_lines = orlist_text.splitlines()
comment_blocks = get_comment_blocks(orlist_lines)
yaml_texts = [comment_block["comment"] for comment_block in comment_blocks]

CPU times: user 1.23 ms, sys: 901 µs, total: 2.13 ms
Wall time: 1.42 ms


In [27]:
print(yaml_texts[0])


cycle_number: 23
sequence_number: 201454
drop_chip_si_modes: [TE_00D50, 'NULL', 'NULL', 'NULL', 'NULL', 'NULL']
acis_fp_limit: -109.0
phase_window: !table |2
                  Start                   End
  --------------------- ---------------------
  2023:185:15:55:59.750 2023:190:17:33:48.730
comment: |
  Remarks:
  Three observations are requested: conjunction with Be star in front
  (phi=0.25), conjunction with companion in front (phi=0.75) and quadrature
  (either phi=0 or phi=0.5)



In [11]:
def odict_to_dict(data):
    """Recursively replace each OrderedDict with dict"""
    if isinstance(data, dict):
        data = {k: odict_to_dict(v) for k, v in data.items()}
    elif isinstance(data, list):
        data = [odict_to_dict(v) for v in data]
    return data

### Convert list of YAML text (one per obsid) to list of structured data

In [28]:
def get_data_structs(yaml_texts) -> list[dict]:
    """Get data structures from YAML texts as a list of dict"""
    data_structs = []

    for yaml_text in yaml_texts:
        data = yaml_loads(yaml_text)
        data_structs.append(data)

    return odict_to_dict(data_structs)

In [34]:
%%time

data_structs = get_data_structs(yaml_texts)

CPU times: user 46.2 ms, sys: 2.36 ms, total: 48.6 ms
Wall time: 48 ms


In [37]:
ds0 = data_structs[0]

pprint.pprint(ds0, sort_dicts=False)

{'cycle_number': 23,
 'sequence_number': 201454,
 'drop_chip_si_modes': ['TE_00D50', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL'],
 'acis_fp_limit': -109.0,
 'phase_window': <Table length=1>
        Start                  End         
        str21                 str21        
--------------------- ---------------------
2023:185:15:55:59.750 2023:190:17:33:48.730,
 'comment': 'Remarks:\n'
            'Three observations are requested: conjunction with Be star in '
            'front\n'
            '(phi=0.25), conjunction with companion in front (phi=0.75) and '
            'quadrature\n'
            '(either phi=0 or phi=0.5)\n'}


### Encode list of data structures as a single JSON text string

This could be efficiently passed to MATLAB.

In [43]:
class TableEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, Table):
            data = {"DATA_TABLE": True}
            for colname in obj.colnames:
                data[colname] = obj[colname].tolist()
            return data
        return json.JSONEncoder.default(self, obj)


In [44]:
ds0_json = json.dumps(ds0, indent=2, cls=TableEncoder)
print(ds0_json)


{
  "cycle_number": 23,
  "sequence_number": 201454,
  "drop_chip_si_modes": [
    "TE_00D50",
    "NULL",
    "NULL",
    "NULL",
    "NULL",
    "NULL"
  ],
  "acis_fp_limit": -109.0,
  "phase_window": {
    "DATA_TABLE": true,
    "Start": [
      "2023:185:15:55:59.750"
    ],
    "End": [
      "2023:190:17:33:48.730"
    ]
  },
  "comment": "Remarks:\nThree observations are requested: conjunction with Be star in front\n(phi=0.25), conjunction with companion in front (phi=0.75) and quadrature\n(either phi=0 or phi=0.5)\n"
}


In [63]:
%%time

json_text = json.dumps(data_structs, cls=TableEncoder)

CPU times: user 301 µs, sys: 6 µs, total: 307 µs
Wall time: 312 µs


### Decode JSON text string into a list of data structures

This would be done in MATLAB. In Python this is very fast.

In [64]:
def json_loads(text):
    data_structs: list[dict] = json.loads(text)
    # Not recursive, only top level
    for data_struct in data_structs:
        for key, value in list(data_struct.items()):
            if isinstance(value, dict) and value.pop("DATA_TABLE", None):
                data_struct[key] = Table(value)
    return data_structs

In [65]:
%%time

json_data_structs = json_loads(json_text)

CPU times: user 2.21 ms, sys: 8 µs, total: 2.22 ms
Wall time: 2.23 ms


In [66]:
pprint.pprint(json_data_structs[0])

{'acis_fp_limit': -109.0,
 'comment': 'Remarks:\n'
            'Three observations are requested: conjunction with Be star in '
            'front\n'
            '(phi=0.25), conjunction with companion in front (phi=0.75) and '
            'quadrature\n'
            '(either phi=0 or phi=0.5)\n',
 'cycle_number': 23,
 'drop_chip_si_modes': ['TE_00D50', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL'],
 'phase_window': <Table length=1>
        Start                  End         
        str21                 str21        
--------------------- ---------------------
2023:185:15:55:59.750 2023:190:17:33:48.730,
 'sequence_number': 201454}


### List of data structures back to JSON text

Would be done in MATLAB if any machine-readable comments should be updated by ORviewer.

In [68]:
json_text_rt = json.dumps(json_data_structs, cls=TableEncoder)

### JSON text back to list of data structures (one per obsid)

Would be done in Python.

In [69]:
json_data_structures_rt = json_loads(json_text_rt)


In [70]:
pprint.pprint(json_data_structures_rt[0])


{'acis_fp_limit': -109.0,
 'comment': 'Remarks:\n'
            'Three observations are requested: conjunction with Be star in '
            'front\n'
            '(phi=0.25), conjunction with companion in front (phi=0.75) and '
            'quadrature\n'
            '(either phi=0 or phi=0.5)\n',
 'cycle_number': 23,
 'drop_chip_si_modes': ['TE_00D50', 'NULL', 'NULL', 'NULL', 'NULL', 'NULL'],
 'phase_window': <Table length=1>
        Start                  End         
        str21                 str21        
--------------------- ---------------------
2023:185:15:55:59.750 2023:190:17:33:48.730,
 'sequence_number': 201454}


In [22]:
mrc0_rt_yaml = yaml_dumps(mrc0_rt)
print(mrc0_rt_yaml)

cycle_number: 23
sequence_number: 201454
drop_chip_si_modes: [TE_00D50, 'NULL', 'NULL', 'NULL', 'NULL', 'NULL']
acis_fp_limit: -109.0
phase_window: !table |2
                  Start                   End
  --------------------- ---------------------
  2023:185:15:55:59.750 2023:190:17:33:48.730
comment: |
  Remarks:
  Three observations are requested: conjunction with Be star in front
  (phi=0.25), conjunction with companion in front (phi=0.75) and quadrature
  (either phi=0 or phi=0.5)



In [23]:
print(comment_blocks[0]["comment"])


cycle_number: 23
sequence_number: 201454
drop_chip_si_modes: [TE_00D50, 'NULL', 'NULL', 'NULL', 'NULL', 'NULL']
acis_fp_limit: -109.0
phase_window: !table |2
                  Start                   End
  --------------------- ---------------------
  2023:185:15:55:59.750 2023:190:17:33:48.730
comment: |
  Remarks:
  Three observations are requested: conjunction with Be star in front
  (phi=0.25), conjunction with companion in front (phi=0.75) and quadrature
  (either phi=0 or phi=0.5)



In [24]:
comment_blocks[0]["comment"].strip("\n") == mrc0_rt_yaml.strip("\n")


True