## Preface

## RecordSchema

```typescript
export interface RecordSchema {


    /**
     * Must be `record`
     */
    type: "record";


    /**
     * Defines the fields of the record.
     */
    fields?: RecordField[];

}
```

## Record Field

```typescript
export interface RecordField {

    label?: string;


    /**
     * The name of the field
     *
     */
    name: string;


    /**
     * A documentation string for this field
     *
     */
    doc?: string;


    /**
     * The field type
     *
     */
    type: PrimitiveType | RecordSchema | EnumSchema | ArraySchema | string | Array<PrimitiveType | RecordSchema | EnumSchema | ArraySchema | string>;

}
```

## Primitive Type

```typescript
/**
 * Salad data types are based on Avro schema declarations.  Refer to the
 [Avro schema declaration documentation](https://avro.apache.org/docs/current/spec.html#schemas) for
 detailed information.
 ,null: no value,boolean: a binary value,int: 32-bit signed integer,long: 64-bit signed integer,float: single precision (32-bit) IEEE 754 floating-point number,double: double precision (64-bit) IEEE 754 floating-point number,string: Unicode character sequence
 */
export type PrimitiveType = "null" | "boolean" | "int" | "long" | "float" | "double" | "string";
```

## Goals

Determine if the *type* is:
  * optional
  * an array

Then determine which *type* we're dealing with

What do we do if the type value is an `import`. We assume a second record schema, use the label attribute to assign type?

Do we need to generate a TS interface for the imported schema first? - Could this work recursively?




In [2]:
from pathlib import Path
import json
from cwl_utils.parser_v1_1 import RecordSchema, load_document_by_yaml
from ruamel.yaml.comments import CommentedMap as OrderedDict
from ruamel import yaml

from cwl_utils.graph_split import rewrite_schemadef, rewrite
from pprint import pprint

In [3]:
CWL_FILE_PATH = Path("../schemas/samplesheet/1.0.0/samplesheet__1.0.0.yaml")

# TODO - pull in multiple CWL File paths here so we can see the differences in complexities


In [36]:
import cwltool

In [88]:
def import_cwl_yaml(cwl_file_path):
    # Read in the cwl file from a yaml
    with open(cwl_file_path, "r") as cwl_h:
        yaml_obj = yaml.main.round_trip_load(cwl_h, preserve_quotes=True)

    return RecordSchema(yaml_obj)

In [92]:
with open(CWL_FILE_PATH, "r") as cwl_h:
    yaml_obj = yaml.main.round_trip_load(cwl_h, preserve_quotes=True)

In [93]:
yaml_obj

ordereddict([('type', 'record'), ('name', 'samplesheet'), ('fields', ordereddict([('header', ordereddict([('label', 'samplesheet header'), ('doc', 'The samplesheet header object\n'), ('type', [ordereddict([('$import', '../../../schemas/samplesheet-header/1.0.0/samplesheet-header__1.0.0.yaml#samplesheet-header')])])])), ('reads', ordereddict([('label', 'reads'), ('doc', 'The reads\n'), ('type', [ordereddict([('$import', '../../../schemas/samplesheet-reads/1.0.0/samplesheet-reads__1.0.0.yaml#samplesheet-reads')])])])), ('bclconvert_settings', ordereddict([('label', 'BCLConvert Settings section'), ('doc', 'The bclconvert settings used for demux\n'), ('type', [ordereddict([('$import', '../../../schemas/bclconvert-settings/3.10.5/bclconvert-settings__3.10.5.yaml#bclconvert-settings')])])])), ('bclconvert_data', ordereddict([('label', 'BCLConvert Data section'), ('doc', 'The array of bclconvert data objects\n'), ('type', ordereddict([('type', 'array'), ('items', ordereddict([('$import', '../

In [95]:
pprint(yaml_obj)

{'fields': {'bclconvert_data': {'doc': 'The array of bclconvert data objects\n',
                                'label': 'BCLConvert Data section',
                                'type': {'items': {'$import': '../../../schemas/bclconvert-data-row/3.10.5/bclconvert-data-row__3.10.5.yaml#bclconvert-data-row'},
                                         'type': 'array'}},
            'bclconvert_settings': {'doc': 'The bclconvert settings used for '
                                           'demux\n',
                                    'label': 'BCLConvert Settings section',
                                    'type': [ordereddict([('$import', '../../../schemas/bclconvert-settings/3.10.5/bclconvert-settings__3.10.5.yaml#bclconvert-settings')])]},
            'header': {'doc': 'The samplesheet header object\n',
                       'label': 'samplesheet header',
                       'type': [ordereddict([('$import', '../../../schemas/samplesheet-header/1.0.0/samplesheet-header__1.0.0

In [97]:
z = RecordSchema(yaml_obj.get("fields"))

In [103]:
z.__dict__

{'extension_fields': ordereddict(),
 'loadingOptions': <cwl_utils.parser_v1_1.LoadingOptions at 0x7f8302e39dc0>,
 'fields': None,
 'type': ordereddict([('header', ordereddict([('label', 'samplesheet header'), ('doc', 'The samplesheet header object\n'), ('type', [ordereddict([('$import', '../../../schemas/samplesheet-header/1.0.0/samplesheet-header__1.0.0.yaml#samplesheet-header')])])])), ('reads', ordereddict([('label', 'reads'), ('doc', 'The reads\n'), ('type', [ordereddict([('$import', '../../../schemas/samplesheet-reads/1.0.0/samplesheet-reads__1.0.0.yaml#samplesheet-reads')])])])), ('bclconvert_settings', ordereddict([('label', 'BCLConvert Settings section'), ('doc', 'The bclconvert settings used for demux\n'), ('type', [ordereddict([('$import', '../../../schemas/bclconvert-settings/3.10.5/bclconvert-settings__3.10.5.yaml#bclconvert-settings')])])])), ('bclconvert_data', ordereddict([('label', 'BCLConvert Data section'), ('doc', 'The array of bclconvert data objects\n'), ('type', o

In [109]:
for key, value_dict in dict(z.type).items():
    print(f"Key: {key}")
    print("Value:")
    pprint(value_dict, indent=4)
    print(f"Value type:")
    pprint(value_dict.get("type"))
    print("\n")

Key: header
Value:
{   'doc': 'The samplesheet header object\n',
    'label': 'samplesheet header',
    'type': [ordereddict([('$import', '../../../schemas/samplesheet-header/1.0.0/samplesheet-header__1.0.0.yaml#samplesheet-header')])]}
Value type:
[ordereddict([('$import', '../../../schemas/samplesheet-header/1.0.0/samplesheet-header__1.0.0.yaml#samplesheet-header')])]


Key: reads
Value:
{   'doc': 'The reads\n',
    'label': 'reads',
    'type': [ordereddict([('$import', '../../../schemas/samplesheet-reads/1.0.0/samplesheet-reads__1.0.0.yaml#samplesheet-reads')])]}
Value type:
[ordereddict([('$import', '../../../schemas/samplesheet-reads/1.0.0/samplesheet-reads__1.0.0.yaml#samplesheet-reads')])]


Key: bclconvert_settings
Value:
{   'doc': 'The bclconvert settings used for demux\n',
    'label': 'BCLConvert Settings section',
    'type': [ordereddict([('$import', '../../../schemas/bclconvert-settings/3.10.5/bclconvert-settings__3.10.5.yaml#bclconvert-settings')])]}
Value type:
[orde

In [81]:
dir(x)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'attrs',
 'extension_fields',
 'fields',
 'fromDoc',
 'loadingOptions',
 'save',
 'type']

In [86]:
x.type

ordereddict([('type', 'record'), ('name', 'samplesheet'), ('fields', ordereddict([('header', ordereddict([('label', 'samplesheet header'), ('doc', 'The samplesheet header object\n'), ('type', [ordereddict([('$import', '.')])])])), ('reads', ordereddict([('label', 'reads'), ('doc', 'The reads\n'), ('type', [ordereddict([('$import', '.')])])])), ('bclconvert_settings', ordereddict([('label', 'BCLConvert Settings section'), ('doc', 'The bclconvert settings used for demux\n'), ('type', [ordereddict([('$import', '.')])])])), ('bclconvert_data', ordereddict([('label', 'BCLConvert Data section'), ('doc', 'The array of bclconvert data objects\n'), ('type', ordereddict([('type', 'array'), ('items', ordereddict([('$import', '../../../schemas/bclconvert-data-row/3.10.5/bclconvert-data-row__3.10.5.yaml#bclconvert-data-row')]))]))]))]))])

In [79]:
x.save()

ordereddict([('type', {'type': 'record', 'name': 'samplesheet', 'fields': {'header': {'label': 'samplesheet header', 'doc': 'The samplesheet header object\n', 'type': [{'$import': '.'}]}, 'reads': {'label': 'reads', 'doc': 'The reads\n', 'type': [{'$import': '.'}]}, 'bclconvert_settings': {'label': 'BCLConvert Settings section', 'doc': 'The bclconvert settings used for demux\n', 'type': [{'$import': '.'}]}, 'bclconvert_data': {'label': 'BCLConvert Data section', 'doc': 'The array of bclconvert data objects\n', 'type': {'type': 'array', 'items': {'$import': '../../../schemas/bclconvert-data-row/3.10.5/bclconvert-data-row__3.10.5.yaml#bclconvert-data-row'}}}}})])

In [74]:
x.loadingOptions.__dict__

{'idx': {},
 'fileuri': None,
 'namespaces': None,
 'schemas': None,
 'original_doc': None,
 'fetcher': <schema_salad.fetcher.DefaultFetcher at 0x7f83082787c0>,
 'vocab': {'Any': 'https://w3id.org/cwl/salad#Any',
  'ArraySchema': 'https://w3id.org/cwl/salad#ArraySchema',
  'CWLType': 'https://w3id.org/cwl/cwl#CWLType',
  'CWLVersion': 'https://w3id.org/cwl/cwl#CWLVersion',
  'CommandInputArraySchema': 'https://w3id.org/cwl/cwl#CommandInputArraySchema',
  'CommandInputEnumSchema': 'https://w3id.org/cwl/cwl#CommandInputEnumSchema',
  'CommandInputParameter': 'https://w3id.org/cwl/cwl#CommandInputParameter',
  'CommandInputRecordField': 'https://w3id.org/cwl/cwl#CommandInputRecordField',
  'CommandInputRecordSchema': 'https://w3id.org/cwl/cwl#CommandInputRecordSchema',
  'CommandInputSchema': 'https://w3id.org/cwl/cwl#CommandInputSchema',
  'CommandLineBindable': 'https://w3id.org/cwl/cwl#CommandLineBindable',
  'CommandLineBinding': 'https://w3id.org/cwl/cwl#CommandLineBinding',
  'Comma

In [51]:
pprint(dict(x.type))

{'fields': {'bclconvert_data': {'doc': 'The array of bclconvert data objects\n',
                                'label': 'BCLConvert Data section',
                                'type': {'items': {'$import': '../../../schemas/bclconvert-data-row/3.10.5/bclconvert-data-row__3.10.5.yaml#bclconvert-data-row'},
                                         'type': 'array'}},
            'bclconvert_settings': {'doc': 'The bclconvert settings used for '
                                           'demux\n',
                                    'label': 'BCLConvert Settings section',
                                    'type': [ordereddict([('$import', '.')])]},
            'header': {'doc': 'The samplesheet header object\n',
                       'label': 'samplesheet header',
                       'type': [ordereddict([('$import', '.')])]},
            'reads': {'doc': 'The reads\n',
                      'label': 'reads',
                      'type': [ordereddict([('$import', '.')])]}},
 '

In [18]:
load_document_by_yaml(x, CWL_FILE_PATH.absolute().as_uri())

ValidationException: - tried _RecordLoader but
  Not a CommandLineTool
- tried _RecordLoader but
  Not a ExpressionTool
- tried _RecordLoader but
  Not a Workflow
- tried _ArrayLoader but
  Expected a list

In [38]:
from cwltool import schemas

In [45]:
from typing import (
    IO,
    Any,
    Callable,
    Dict,
    Generator,
    Iterable,
    List,
    MutableMapping,
    MutableSequence,
    NamedTuple,
    Optional,
    Set,
    Tuple,
    Union,
    cast,
)


In [46]:
def visit_field(rec: Any, field: str, op: Callable[..., Any]) -> None:
    """Apply a function to mapping with 'field'."""
    if isinstance(rec, MutableMapping):
        if field in rec:
            rec[field] = op(rec[field])
        for d in rec:
            visit_field(rec[d], field, op)
    if isinstance(rec, MutableSequence):
        for d in rec:
            visit_field(d, field, op)

In [52]:
visit_field(x.type.get("fields"), "bclconvert_data")

TypeError: visit_field() missing 1 required positional argument: 'op'

In [59]:
y = x.type

assert y.get("type") == "record"

In [61]:
pprint(y.get("fields"))

{'bclconvert_data': {'doc': 'The array of bclconvert data objects\n',
                     'label': 'BCLConvert Data section',
                     'type': {'items': {'$import': '../../../schemas/bclconvert-data-row/3.10.5/bclconvert-data-row__3.10.5.yaml#bclconvert-data-row'},
                              'type': 'array'}},
 'bclconvert_settings': {'doc': 'The bclconvert settings used for demux\n',
                         'label': 'BCLConvert Settings section',
                         'type': [ordereddict([('$import', '.')])]},
 'header': {'doc': 'The samplesheet header object\n',
            'label': 'samplesheet header',
            'type': [ordereddict([('$import', '.')])]},
 'reads': {'doc': 'The reads\n',
           'label': 'reads',
           'type': [ordereddict([('$import', '.')])]}}


In [67]:
for field_key, field_dict in y.get("fields").items():
    print(f"Field key {field_key}")
    print(f"Field dict:")
    pprint(field_dict, indent=4)

Field key header
Field dict:
{   'doc': 'The samplesheet header object\n',
    'label': 'samplesheet header',
    'type': [ordereddict([('$import', '.')])]}
Field key reads
Field dict:
{   'doc': 'The reads\n',
    'label': 'reads',
    'type': [ordereddict([('$import', '.')])]}
Field key bclconvert_settings
Field dict:
{   'doc': 'The bclconvert settings used for demux\n',
    'label': 'BCLConvert Settings section',
    'type': [ordereddict([('$import', '.')])]}
Field key bclconvert_data
Field dict:
{   'doc': 'The array of bclconvert data objects\n',
    'label': 'BCLConvert Data section',
    'type': {   'items': {   '$import': '../../../schemas/bclconvert-data-row/3.10.5/bclconvert-data-row__3.10.5.yaml#bclconvert-data-row'},
                'type': 'array'}}


In [68]:
y.get("fields").get("bclconvert_settings")

ordereddict([('label', 'BCLConvert Settings section'), ('doc', 'The bclconvert settings used for demux\n'), ('type', [ordereddict([('$import', '.')])])])

In [69]:
y.get("fields").get("bclconvert_settings").get("type")

[ordereddict([('$import', '.')])]