From cad43cb6a48255d18e409e310d649a8f41be8fe6 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 22 Jun 2022 08:33:56 -0500 Subject: [PATCH 1/2] Add typeparser to v2. --- src/awkward/_typeparser/parser.py | 405 ++++++++++++++++++++++++--- src/awkward/_v2/types/__init__.py | 2 + src/awkward/_v2/types/recordtype.py | 5 +- src/awkward/types.py | 2 +- tests/v2/test_0773-typeparser.py | 415 ++++++++++++++++++++++++++++ 5 files changed, 779 insertions(+), 50 deletions(-) create mode 100644 tests/v2/test_0773-typeparser.py diff --git a/src/awkward/_typeparser/parser.py b/src/awkward/_typeparser/parser.py index c4506e6e71..4e60d83db5 100644 --- a/src/awkward/_typeparser/parser.py +++ b/src/awkward/_typeparser/parser.py @@ -1,6 +1,6 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE -# v2: keep this file, but change the Type-generation to generate v2 Types. +# v2: keep this file, but drop the two *_v1 functions import awkward as ak @@ -33,14 +33,17 @@ def false(self, s): return False -def toast(ptnode, high_level, categorical): +def toast_v1(ptnode, highlevel, categorical): if ptnode.__class__.__name__ == "Token": return ptnode.value + elif ptnode.data == "start": - return toast(ptnode.children[0], high_level, categorical) + return toast_v1(ptnode.children[0], highlevel, categorical) + elif ptnode.data == "input": assert len(ptnode.children) == 1 - return toast(ptnode.children[0], high_level, categorical) + return toast_v1(ptnode.children[0], highlevel, categorical) + elif ptnode.data == "predefined_typestr": if ptnode.children[0] == "string": parms = {"__array__": "string"} @@ -79,7 +82,8 @@ def toast(ptnode, high_level, categorical): typestr="bytes", ) else: - raise Exception(f"Unhandled typestring {ptnode.children[0]}") + raise AssertionError(f"unhandled typestring {ptnode.children[0]}") + elif ptnode.data == "primitive": if len(ptnode.children) == 1: parms = {} @@ -87,22 +91,24 @@ def toast(ptnode, high_level, categorical): parms.update({"__categorical__": True}) categorical = False return ak.types.PrimitiveType( - toast(ptnode.children[0], high_level, False), parameters=parms + toast_v1(ptnode.children[0], highlevel, False), parameters=parms ) elif len(ptnode.children) == 2: - parms = toast(ptnode.children[1], high_level, False) + parms = toast_v1(ptnode.children[1], highlevel, False) if categorical: parms.update({"__categorical__": True}) categorical = False return ak.types.PrimitiveType( - toast(ptnode.children[0], high_level, categorical), + toast_v1(ptnode.children[0], highlevel, categorical), parms, ) else: - raise Exception("Unhandled PrimitiveType node") + raise AssertionError("unhandled PrimitiveType node") + elif ptnode.data == "categories": - assert high_level is True - return toast(ptnode.children[0], high_level, True) + assert highlevel is True + return toast_v1(ptnode.children[0], highlevel, True) + elif ptnode.data == "unknown": if len(ptnode.children) == 0: parms = {} @@ -111,33 +117,38 @@ def toast(ptnode, high_level, categorical): categorical = False return ak.types.UnknownType(parameters=parms) elif len(ptnode.children) == 1: - parms = toast(ptnode.children[0], high_level, False) + parms = toast_v1(ptnode.children[0], highlevel, False) if categorical: parms.update({"__categorical__": True}) categorical = False return ak.types.UnknownType(parameters=parms) else: - raise Exception("Unhandled UnknownType node") + raise AssertionError("unhandled UnknownType node") + elif ptnode.data == "listtype": - return toast(ptnode.children[0], high_level, categorical) + return toast_v1(ptnode.children[0], highlevel, categorical) + elif ptnode.data == "list_single": parms = {} if categorical: parms.update({"__categorical__": True}) categorical = False return ak.types.ListType( - toast(ptnode.children[0], high_level, False), parameters=parms + toast_v1(ptnode.children[0], highlevel, False), parameters=parms ) + elif ptnode.data == "list_parm": - parms = toast(ptnode.children[1], high_level, False) + parms = toast_v1(ptnode.children[1], highlevel, False) if categorical: parms.update({"__categorical__": True}) categorical = False return ak.types.ListType( - toast(ptnode.children[0], high_level, categorical), parms + toast_v1(ptnode.children[0], highlevel, categorical), parms ) + elif ptnode.data == "uniontype": - return toast(ptnode.children[0], high_level, categorical) + return toast_v1(ptnode.children[0], highlevel, categorical) + elif ptnode.data == "union_single": parms = {} if categorical: @@ -145,47 +156,54 @@ def toast(ptnode, high_level, categorical): categorical = False content_list = [] for node in ptnode.children: - content_list.append(toast(node, high_level, False)) + content_list.append(toast_v1(node, highlevel, False)) return ak.types.UnionType(content_list, parameters=parms) + elif ptnode.data == "union_parm": - parms = toast(ptnode.children[-1], high_level, False) + parms = toast_v1(ptnode.children[-1], highlevel, False) if categorical: parms.update({"__categorical__": True}) categorical = False content_list = [] for node in ptnode.children[:-1]: - content_list.append(toast(node, high_level, False)) + content_list.append(toast_v1(node, highlevel, False)) return ak.types.UnionType(content_list, parms) + elif ptnode.data == "optiontype": - return toast(ptnode.children[0], high_level, categorical) + return toast_v1(ptnode.children[0], highlevel, categorical) + elif ptnode.data == "option_single": parms = {} if categorical: parms.update({"__categorical__": True}) categorical = False return ak.types.OptionType( - toast(ptnode.children[0], high_level, False), parameters=parms + toast_v1(ptnode.children[0], highlevel, False), parameters=parms ) + elif ptnode.data == "option_parm": - parms = toast(ptnode.children[1], high_level, False) + parms = toast_v1(ptnode.children[1], highlevel, False) if categorical: parms.update({"__categorical__": True}) categorical = False return ak.types.OptionType( - toast(ptnode.children[0], high_level, False), + toast_v1(ptnode.children[0], highlevel, False), parameters=parms, ) + elif ptnode.data == "option_highlevel": - assert high_level + assert highlevel parms = {} if categorical: parms.update({"__categorical__": True}) categorical = False return ak.types.OptionType( - toast(ptnode.children[0], high_level, False), parameters=parms + toast_v1(ptnode.children[0], highlevel, False), parameters=parms ) + elif ptnode.data == "record": - return toast(ptnode.children[0], high_level, categorical) + return toast_v1(ptnode.children[0], highlevel, categorical) + elif ptnode.data == "record_tuple": parms = {} if categorical: @@ -193,8 +211,9 @@ def toast(ptnode, high_level, categorical): categorical = False content_list = [] for node in ptnode.children: - content_list.append(toast(node, high_level, categorical)) + content_list.append(toast_v1(node, highlevel, categorical)) return ak.types.RecordType(tuple(content_list), parameters=parms) + elif ptnode.data == "record_dict": parms = {} if categorical: @@ -204,19 +223,23 @@ def toast(ptnode, high_level, categorical): content_keys = [] for i in range(0, len(ptnode.children), 2): content_keys.append(ptnode.children[i]) - content_types.append(toast(ptnode.children[i + 1], high_level, categorical)) + content_types.append( + toast_v1(ptnode.children[i + 1], highlevel, categorical) + ) return ak.types.RecordType(content_types, content_keys, parameters=parms) + elif ptnode.data == "record_tuple_param": - parms = toast(ptnode.children[-1], high_level, False) + parms = toast_v1(ptnode.children[-1], highlevel, False) if categorical: parms.update({"__categorical__": True}) categorical = False content_list = [] for node in ptnode.children[:-1]: - content_list.append(toast(node, high_level, False)) + content_list.append(toast_v1(node, highlevel, False)) return ak.types.RecordType(tuple(content_list), parameters=parms) + elif ptnode.data == "record_struct": - parms = toast(ptnode.children[-1], high_level, False) + parms = toast_v1(ptnode.children[-1], highlevel, False) if categorical: parms.update({"__categorical__": True}) categorical = False @@ -226,14 +249,15 @@ def toast(ptnode, high_level, categorical): if isinstance(node, str): content_keys.append(node) else: - content_list.append(toast(node, high_level, False)) + content_list.append(toast_v1(node, highlevel, False)) return ak.types.RecordType( tuple(content_list), keys=content_keys, parameters=parms, ) + elif ptnode.data == "record_highlevel": - assert high_level + assert highlevel parms = {"__record__": ptnode.children[0]} if categorical: parms.update({"__categorical__": True}) @@ -244,45 +268,334 @@ def toast(ptnode, high_level, categorical): if isinstance(node, str): content_keys.append(node) else: - content_list.append(toast(node, high_level, False)) + content_list.append(toast_v1(node, highlevel, False)) return ak.types.RecordType( tuple(content_list), keys=content_keys, parameters=parms, ) + elif ptnode.data == "regular": assert (len(ptnode.children)) == 1 - return toast(ptnode.children[0], high_level, categorical) + return toast_v1(ptnode.children[0], highlevel, categorical) + elif ptnode.data == "regular_inparm": assert len(ptnode.children) == 2 - if high_level: + if highlevel: return ak.types.ArrayType( - toast(ptnode.children[1], high_level, categorical), ptnode.children[0] + toast_v1(ptnode.children[1], highlevel, categorical), ptnode.children[0] ) return ak.types.RegularType( - toast(ptnode.children[1], high_level, categorical), ptnode.children[0] + toast_v1(ptnode.children[1], highlevel, categorical), ptnode.children[0] ) + elif ptnode.data == "regular_outparm": assert len(ptnode.children) == 3 - parms = toast(ptnode.children[2], high_level, False) + parms = toast_v1(ptnode.children[2], highlevel, False) if categorical: parms.update({"__categorical__": True}) categorical = False return ak.types.RegularType( - toast(ptnode.children[1], high_level, False), + toast_v1(ptnode.children[1], highlevel, False), + ptnode.children[0], + parms, + ) + + elif ptnode.data == "def_option": + assert len(ptnode.children) == 1 + return ptnode.children[0] + + elif ptnode.data == "options": + assert len(ptnode.children) == 1 + return toast_v1(ptnode.children[0], highlevel, categorical) + + else: + raise AssertionError("unhandled node") + + +def from_datashape_v1(typestr, highlevel=False): + parseobj = Lark_StandAlone(transformer=TreeToJson()) + return toast_v1(parseobj.parse(typestr), highlevel, False) + + +def toast(ptnode, highlevel, categorical): + if ptnode.__class__.__name__ == "Token": + return ptnode.value + + elif ptnode.data == "start": + return toast(ptnode.children[0], highlevel, categorical) + + elif ptnode.data == "input": + assert len(ptnode.children) == 1 + return toast(ptnode.children[0], highlevel, categorical) + + elif ptnode.data == "predefined_typestr": + if ptnode.children[0] == "string": + parms = {"__array__": "string"} + if categorical: + parms.update({"__categorical__": True}) + categorical = False + return ak._v2.types.ListType( + ak._v2.types.NumpyType( + "uint8", parameters={"__array__": "char"}, typestr="char" + ), + parameters=parms, + typestr="string", + ) + elif ptnode.children[0] == "char": + parms = {"__array__": "char"} + if categorical: + parms.update({"__categorical__": True}) + categorical = False + return ak._v2.types.NumpyType("uint8", parameters=parms, typestr="char") + elif ptnode.children[0] == "byte": + parms = {"__array__": "byte"} + if categorical: + parms.update({"__categorical__": True}) + categorical = False + return ak._v2.types.NumpyType("uint8", parameters=parms, typestr="byte") + elif ptnode.children[0] == "bytes": + parms = {"__array__": "bytestring"} + if categorical: + parms.update({"__categorical__": True}) + categorical = False + return ak._v2.types.ListType( + ak._v2.types.NumpyType( + "uint8", parameters={"__array__": "byte"}, typestr="byte" + ), + parameters=parms, + typestr="bytes", + ) + else: + raise AssertionError(f"unhandled typestring {ptnode.children[0]}") + + elif ptnode.data == "primitive": + if len(ptnode.children) == 1: + parms = {} + if categorical: + parms.update({"__categorical__": True}) + categorical = False + return ak._v2.types.NumpyType( + toast(ptnode.children[0], highlevel, False), parameters=parms + ) + elif len(ptnode.children) == 2: + parms = toast(ptnode.children[1], highlevel, False) + if categorical: + parms.update({"__categorical__": True}) + categorical = False + return ak._v2.types.NumpyType( + toast(ptnode.children[0], highlevel, categorical), + parms, + ) + else: + raise AssertionError("unhandled NumpyType node") + + elif ptnode.data == "categories": + assert highlevel is True + return toast(ptnode.children[0], highlevel, True) + + elif ptnode.data == "unknown": + if len(ptnode.children) == 0: + parms = {} + if categorical: + parms.update({"__categorical__": True}) + categorical = False + return ak._v2.types.UnknownType(parameters=parms) + elif len(ptnode.children) == 1: + parms = toast(ptnode.children[0], highlevel, False) + if categorical: + parms.update({"__categorical__": True}) + categorical = False + return ak._v2.types.UnknownType(parameters=parms) + else: + raise AssertionError("unhandled UnknownType node") + + elif ptnode.data == "listtype": + return toast(ptnode.children[0], highlevel, categorical) + + elif ptnode.data == "list_single": + parms = {} + if categorical: + parms.update({"__categorical__": True}) + categorical = False + return ak._v2.types.ListType( + toast(ptnode.children[0], highlevel, False), parameters=parms + ) + + elif ptnode.data == "list_parm": + parms = toast(ptnode.children[1], highlevel, False) + if categorical: + parms.update({"__categorical__": True}) + categorical = False + return ak._v2.types.ListType( + toast(ptnode.children[0], highlevel, categorical), parms + ) + + elif ptnode.data == "uniontype": + return toast(ptnode.children[0], highlevel, categorical) + + elif ptnode.data == "union_single": + parms = {} + if categorical: + parms.update({"__categorical__": True}) + categorical = False + content_list = [] + for node in ptnode.children: + content_list.append(toast(node, highlevel, False)) + return ak._v2.types.UnionType(content_list, parameters=parms) + + elif ptnode.data == "union_parm": + parms = toast(ptnode.children[-1], highlevel, False) + if categorical: + parms.update({"__categorical__": True}) + categorical = False + content_list = [] + for node in ptnode.children[:-1]: + content_list.append(toast(node, highlevel, False)) + return ak._v2.types.UnionType(content_list, parms) + + elif ptnode.data == "optiontype": + return toast(ptnode.children[0], highlevel, categorical) + + elif ptnode.data == "option_single": + parms = {} + if categorical: + parms.update({"__categorical__": True}) + categorical = False + return ak._v2.types.OptionType( + toast(ptnode.children[0], highlevel, False), parameters=parms + ) + + elif ptnode.data == "option_parm": + parms = toast(ptnode.children[1], highlevel, False) + if categorical: + parms.update({"__categorical__": True}) + categorical = False + return ak._v2.types.OptionType( + toast(ptnode.children[0], highlevel, False), + parameters=parms, + ) + + elif ptnode.data == "option_highlevel": + assert highlevel + parms = {} + if categorical: + parms.update({"__categorical__": True}) + categorical = False + return ak._v2.types.OptionType( + toast(ptnode.children[0], highlevel, False), parameters=parms + ) + + elif ptnode.data == "record": + return toast(ptnode.children[0], highlevel, categorical) + + elif ptnode.data == "record_tuple": + parms = {} + if categorical: + parms.update({"__categorical__": True}) + categorical = False + content_list = [] + for node in ptnode.children: + content_list.append(toast(node, highlevel, categorical)) + return ak._v2.types.RecordType(content_list, None, parameters=parms) + + elif ptnode.data == "record_dict": + parms = {} + if categorical: + parms.update({"__categorical__": True}) + categorical = False + content_types = [] + content_keys = [] + for i in range(0, len(ptnode.children), 2): + content_keys.append(ptnode.children[i]) + content_types.append(toast(ptnode.children[i + 1], highlevel, categorical)) + return ak._v2.types.RecordType(content_types, content_keys, parameters=parms) + + elif ptnode.data == "record_tuple_param": + parms = toast(ptnode.children[-1], highlevel, False) + if categorical: + parms.update({"__categorical__": True}) + categorical = False + content_list = [] + for node in ptnode.children[:-1]: + content_list.append(toast(node, highlevel, False)) + return ak._v2.types.RecordType(content_list, None, parameters=parms) + + elif ptnode.data == "record_struct": + parms = toast(ptnode.children[-1], highlevel, False) + if categorical: + parms.update({"__categorical__": True}) + categorical = False + content_list = [] + content_keys = [] + for node in ptnode.children[:-1]: + if isinstance(node, str): + content_keys.append(node) + else: + content_list.append(toast(node, highlevel, False)) + return ak._v2.types.RecordType( + content_list, + content_keys, + parameters=parms, + ) + + elif ptnode.data == "record_highlevel": + assert highlevel + parms = {"__record__": ptnode.children[0]} + if categorical: + parms.update({"__categorical__": True}) + categorical = False + content_list = [] + content_keys = [] + for node in ptnode.children[1:]: + if isinstance(node, str): + content_keys.append(node) + else: + content_list.append(toast(node, highlevel, False)) + return ak._v2.types.RecordType( + content_list, + content_keys, + parameters=parms, + ) + + elif ptnode.data == "regular": + assert (len(ptnode.children)) == 1 + return toast(ptnode.children[0], highlevel, categorical) + + elif ptnode.data == "regular_inparm": + assert len(ptnode.children) == 2 + if highlevel: + return ak._v2.types.ArrayType( + toast(ptnode.children[1], highlevel, categorical), ptnode.children[0] + ) + return ak._v2.types.RegularType( + toast(ptnode.children[1], highlevel, categorical), ptnode.children[0] + ) + + elif ptnode.data == "regular_outparm": + assert len(ptnode.children) == 3 + parms = toast(ptnode.children[2], highlevel, False) + if categorical: + parms.update({"__categorical__": True}) + categorical = False + return ak._v2.types.RegularType( + toast(ptnode.children[1], highlevel, False), ptnode.children[0], parms, ) + elif ptnode.data == "def_option": assert len(ptnode.children) == 1 return ptnode.children[0] + elif ptnode.data == "options": assert len(ptnode.children) == 1 - return toast(ptnode.children[0], high_level, categorical) + return toast(ptnode.children[0], highlevel, categorical) + else: - raise Exception("Unhandled node") + raise AssertionError("unhandled node") -def from_datashape(typestr, high_level=False): +def from_datashape(typestr, highlevel=False): parseobj = Lark_StandAlone(transformer=TreeToJson()) - return toast(parseobj.parse(typestr), high_level, False) + return toast(parseobj.parse(typestr), highlevel, False) diff --git a/src/awkward/_v2/types/__init__.py b/src/awkward/_v2/types/__init__.py index 23274a155d..c137398824 100644 --- a/src/awkward/_v2/types/__init__.py +++ b/src/awkward/_v2/types/__init__.py @@ -1,5 +1,7 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE +from awkward._typeparser.parser import from_datashape # noqa: F401 + from awkward._v2.types.type import Type # noqa: F401 from awkward._v2.types.unknowntype import UnknownType # noqa: F401 from awkward._v2.types.numpytype import NumpyType # noqa: F401 diff --git a/src/awkward/_v2/types/recordtype.py b/src/awkward/_v2/types/recordtype.py index 826ded62ba..96652bbf3b 100644 --- a/src/awkward/_v2/types/recordtype.py +++ b/src/awkward/_v2/types/recordtype.py @@ -1,5 +1,6 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE +import json from collections.abc import Iterable import awkward as ak @@ -103,9 +104,7 @@ def _str(self, indent, compact): pairs = [] for k, v in zip(self._fields, children): if ak._v2._prettyprint.is_identifier.match(k) is None: - key_str = repr(k) - if key_str.startswith("u"): - key_str = key_str[1:] + key_str = json.dumps(k) else: key_str = k pairs.append([key_str, ": "] + v) diff --git a/src/awkward/types.py b/src/awkward/types.py index fa3d4bd428..7932c47882 100644 --- a/src/awkward/types.py +++ b/src/awkward/types.py @@ -3,7 +3,7 @@ # v2: change to pull in classes from src/awkward/_v2/types/*.py. # Typeparser -from awkward._typeparser.parser import from_datashape +from awkward._typeparser.parser import from_datashape_v1 as from_datashape # Types from awkward._ext import Type diff --git a/tests/v2/test_0773-typeparser.py b/tests/v2/test_0773-typeparser.py new file mode 100644 index 0000000000..01aa897e36 --- /dev/null +++ b/tests/v2/test_0773-typeparser.py @@ -0,0 +1,415 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import pytest # noqa: F401 +import awkward as ak # noqa: F401 + + +def test_primitive_1(): + text = "int64" + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.NumpyType) + assert (str(parsedtype)) == text + + +def test_primitive_2(): + text = 'int64[parameters={"wonky": ["parameter", 3.14]}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.NumpyType) + assert (str(parsedtype)) == text + + +def test_unknown_1(): + text = "unknown" + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.UnknownType) + assert (str(parsedtype)) == text + + +def test_unknown_2(): + text = 'unknown[parameters={"wonky": ["parameter", 3.14]}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.UnknownType) + assert str(parsedtype) == text + + +def test_record_tuple_1(): + text = "(int64)" + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.RecordType) + assert str(parsedtype) == text + + +def test_record_tuple_2(): + text = '(int64[parameters={"wonky": ["bla", 1, 2]}])' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.RecordType) + assert str(parsedtype) == text + + +def test_record_tuple_3(): + text = '(int64, int64[parameters={"wonky": ["bla", 1, 2]}])' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.RecordType) + assert str(parsedtype) == text + + +def test_record_dict_1(): + text = '{"1": int64}' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.RecordType) + assert str(parsedtype) == text + + +def test_record_dict_2(): + text = '{"bla": int64[parameters={"wonky": ["bla", 1, 2]}]}' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.RecordType) + assert str(parsedtype) == '{bla: int64[parameters={"wonky": ["bla", 1, 2]}]}' + + +def test_record_dict_3(): + text = '{"bla": int64[parameters={"wonky": ["bla", 1, 2]}], "foo": int64}' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.RecordType) + assert str(parsedtype) == '{bla: int64[parameters={"wonky": ["bla", 1, 2]}], foo: int64}' + + +def test_record_parmtuple_1(): + text = 'tuple[[int64[parameters={"xkcd": [11, 12, 13]}]], parameters={"wonky": ["bla", 1, 2]}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.RecordType) + assert str(parsedtype) == text + + +def test_record_parmtuple_2(): + text = 'tuple[[int64, int64], parameters={"wonky": ["bla", 1, 2]}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.RecordType) + assert str(parsedtype) == text + + +def test_record_struct_1(): + text = 'struct[["1"], [int64[parameters={"xkcd": [11, 12, 13]}]], parameters={"wonky": ["bla", 1, 2]}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.RecordType) + assert str(parsedtype) == 'struct[{"1": int64[parameters={"xkcd": [11, 12, 13]}]}, parameters={"wonky": ["bla", 1, 2]}]' + + +def test_record_struct_2(): + text = 'struct[["1", "2"], [int64[parameters={"xkcd": [11, 12, 13]}], int64], parameters={"wonky": ["bla", 1, 2]}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.RecordType) + assert str(parsedtype) == "struct[{\"1\": int64[parameters={\"xkcd\": [11, 12, 13]}], \"2\": int64}, parameters={\"wonky\": [\"bla\", 1, 2]}]" + + +def test_option_numpy_1(): + text = "?int64" + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.OptionType) + assert str(parsedtype) == text + + +def test_option_numpy_2(): + text = '?int64[parameters={"wonky": [1, 2, 3]}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.OptionType) + assert str(parsedtype) == text + + +def test_option_numpy_1_parm(): + text = 'option[int64, parameters={"foo": "bar"}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.OptionType) + assert str(parsedtype) == text + + +def test_option_numpy_2_parm(): + text = 'option[int64[parameters={"wonky": [1, 2]}], parameters={"foo": "bar"}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.OptionType) + assert str(parsedtype) == text + + +def test_option_unknown_1(): + text = "?unknown" + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.OptionType) + assert str(parsedtype) == text + + +def test_option_unknown_2(): + text = '?unknown[parameters={"foo": "bar"}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.OptionType) + assert str(parsedtype) == text + + +def test_option_unknown_1_parm(): + text = 'option[unknown, parameters={"foo": "bar"}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.OptionType) + assert str(parsedtype) == text + + +def test_option_unknown_2_parm(): + text = 'option[unknown, parameters={"foo": "bar"}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.OptionType) + assert str(parsedtype) == text + + +def test_regular_numpy_1(): + text = "5 * int64" + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.RegularType) + assert str(parsedtype) == text + + +def test_regular_numpy_2(): + text = '5 * int64[parameters={"bar": "foo"}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.RegularType) + assert str(parsedtype) == text + + +def test_regular_numpy_2_parm(): + text = '[0 * int64[parameters={"foo": "bar"}], parameters={"bla": "bloop"}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.RegularType) + assert str(parsedtype) == text + + +def test_regular_unknown_1_parm(): + text = '[0 * unknown, parameters={"foo": "bar"}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.RegularType) + assert str(parsedtype) == text + + +def test_list_numpy_1(): + text = "var * float64" + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.ListType) + assert str(parsedtype) == text + + +def test_list_numpy_1_parm(): + text = '[var * float64[parameters={"wonky": "boop"}], parameters={"foo": "bar"}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.ListType) + assert str(parsedtype) == text + + +def test_union_numpy_empty_1(): + text = 'union[float64[parameters={"wonky": "boop"}], unknown]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.UnionType) + assert str(parsedtype) == text + + +def test_union_numpy_empty_1_parm(): + text = 'union[float64[parameters={"wonky": "boop"}], unknown, parameters={"pratyush": "das"}]' + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.UnionType) + assert str(parsedtype) == text + + +def test_arraytype_string(): + text = str(ak._v2.Array([["one", "two", "three"], [], ["four", "five"]]).type) + parsedtype = ak._v2.types.from_datashape(text, True) + assert isinstance(parsedtype, ak._v2.types.ArrayType) + assert str(parsedtype) == text + + +def test_arraytype_bytestring(): + text = str(ak._v2.Array([[b"one", b"two", b"three"], [], [b"four", b"five"]]).type) + parsedtype = ak._v2.types.from_datashape(text, True) + assert isinstance(parsedtype, ak._v2.types.ArrayType) + assert str(parsedtype) == text + + +def test_arraytype_categorical_1(): + text = str( + ak._v2.behaviors.categorical.to_categorical(ak._v2.Array(["one", "one", "two", "three", "one", "three"])).type + ) + parsedtype = ak._v2.types.from_datashape(text, True) + assert isinstance(parsedtype, ak._v2.types.ArrayType) + assert str(parsedtype) == text + + +def test_arraytype_categorical_2(): + text = str(ak._v2.behaviors.categorical.to_categorical(ak._v2.Array([1.1, 1.1, 2.2, 3.3, 1.1, 3.3])).type) + parsedtype = ak._v2.types.from_datashape(text, True) + assert isinstance(parsedtype, ak._v2.types.ArrayType) + assert str(parsedtype) == text + + +def test_arraytype_record_1(): + text = '3 * Thingy["x": int64, "y": float64]' + parsedtype = ak._v2.types.from_datashape(text, True) + assert isinstance(parsedtype, ak._v2.types.ArrayType) + assert str(parsedtype) == '3 * Thingy[x: int64, y: float64]' + + +def test_arraytype_record_2(): + text = '3 * var * Thingy["x": int64, "y": float64]' + parsedtype = ak._v2.types.from_datashape(text, True) + assert isinstance(parsedtype, ak._v2.types.ArrayType) + assert str(parsedtype) == '3 * var * Thingy[x: int64, y: float64]' + + +def test_arraytype_1(): + text = str(ak._v2.Array([[1, 2, 3], None, [4, 5]]).type) + parsedtype = ak._v2.types.from_datashape(text, True) + assert isinstance(parsedtype, ak._v2.types.ArrayType) + assert str(parsedtype) == text + + +def test_arraytype_2(): + text = str( + ak.with_parameter(ak._v2.Array([[1, 2, 3], [], [4, 5]]), "wonky", "string").type + ) + parsedtype = ak._v2.types.from_datashape(text, True) + assert isinstance(parsedtype, ak._v2.types.ArrayType) + assert str(parsedtype) == text + + +def test_arraytype_3(): + text = str( + ak.with_parameter( + ak._v2.Array([[1, 2, 3], [], [4, 5]]), "wonky", {"other": "JSON"} + ).type + ) + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_arraytype_4(): + text = str( + ak.with_parameter(ak._v2.Array([[1, 2, 3], None, [4, 5]]), "wonky", "string").type + ) + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_arraytype_5(): + text = str( + ak.with_parameter(ak._v2.Array([1, 2, 3, None, 4, 5]), "wonky", "string").type + ) + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_arraytype_6(): + text = str(ak.with_parameter(ak._v2.Array([1, 2, 3, 4, 5]), "wonky", "string").type) + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_arraytype_7(): + text = str(ak._v2.Array([1, 2, 3, None, 4, 5]).type) + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_arraytype_8(): + text = str( + ak.with_parameter( + ak._v2.Array([{"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}, {"x": 3, "y": 3.3}]), + "wonky", + "string", + ).type + ) + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_arraytype_9(): + text = str(ak._v2.Array([(1, 1.1), (2, 2.2), (3, 3.3)]).type) + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_arraytype_10(): + text = str( + ak.with_parameter( + ak._v2.Array([(1, 1.1), (2, 2.2), (3, 3.3)]), "wonky", "string" + ).type + ) + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_arraytype_11(): + text = str(ak._v2.Array([[(1, 1.1), (2, 2.2)], [], [(3, 3.3)]]).type) + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_arraytype_12(): + text = str(ak._v2.to_regular(ak._v2.Array([[1, 2], [3, 4], [5, 6]])).type) + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_arraytype_13(): + text = str( + ak.with_parameter( + ak._v2.to_regular(ak._v2.Array([[1, 2], [3, 4], [5, 6]])), "wonky", "string" + ).type + ) + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_arraytype_14(): + text = str( + ak.with_parameter( + ak._v2.Array([1, 2, 3, [1], [1, 2], [1, 2, 3]]), "wonky", "string" + ).type + ) + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_arraytype_15(): + text = str( + ak.with_parameter( + ak._v2.Array([1, 2, 3, None, [1], [1, 2], [1, 2, 3]]), "wonky", "string" + ).type + ) + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_arraytype_16(): + text = '7 * ?union[int64, var * int64]' + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_arraytype_17(): + text = '7 * ?union[int64, var * unknown]' + parsedtype = ak._v2.types.from_datashape(text) + assert str(parsedtype) == text + + +def test_string(): + text = "string" + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.ListType) + assert str(parsedtype) == text + + +def test_hardcoded(): + text = "var * string" + parsedtype = ak._v2.types.from_datashape(text) + assert isinstance(parsedtype, ak._v2.types.ListType) + assert str(parsedtype) == text + + +def test_record_highlevel(): + text = 'Thingy["x": int64, "y": float64]' + parsedtype = ak._v2.types.from_datashape(text, True) + assert isinstance(parsedtype, ak._v2.types.RecordType) + assert str(parsedtype) == 'Thingy[x: int64, y: float64]' From 61d07b930aa9f666ab6ccbe336057974d4434dc0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 22 Jun 2022 13:36:47 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/v2/test_0773-typeparser.py | 39 +++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/tests/v2/test_0773-typeparser.py b/tests/v2/test_0773-typeparser.py index 01aa897e36..906cc68479 100644 --- a/tests/v2/test_0773-typeparser.py +++ b/tests/v2/test_0773-typeparser.py @@ -71,7 +71,10 @@ def test_record_dict_3(): text = '{"bla": int64[parameters={"wonky": ["bla", 1, 2]}], "foo": int64}' parsedtype = ak._v2.types.from_datashape(text) assert isinstance(parsedtype, ak._v2.types.RecordType) - assert str(parsedtype) == '{bla: int64[parameters={"wonky": ["bla", 1, 2]}], foo: int64}' + assert ( + str(parsedtype) + == '{bla: int64[parameters={"wonky": ["bla", 1, 2]}], foo: int64}' + ) def test_record_parmtuple_1(): @@ -92,14 +95,20 @@ def test_record_struct_1(): text = 'struct[["1"], [int64[parameters={"xkcd": [11, 12, 13]}]], parameters={"wonky": ["bla", 1, 2]}]' parsedtype = ak._v2.types.from_datashape(text) assert isinstance(parsedtype, ak._v2.types.RecordType) - assert str(parsedtype) == 'struct[{"1": int64[parameters={"xkcd": [11, 12, 13]}]}, parameters={"wonky": ["bla", 1, 2]}]' + assert ( + str(parsedtype) + == 'struct[{"1": int64[parameters={"xkcd": [11, 12, 13]}]}, parameters={"wonky": ["bla", 1, 2]}]' + ) def test_record_struct_2(): text = 'struct[["1", "2"], [int64[parameters={"xkcd": [11, 12, 13]}], int64], parameters={"wonky": ["bla", 1, 2]}]' parsedtype = ak._v2.types.from_datashape(text) assert isinstance(parsedtype, ak._v2.types.RecordType) - assert str(parsedtype) == "struct[{\"1\": int64[parameters={\"xkcd\": [11, 12, 13]}], \"2\": int64}, parameters={\"wonky\": [\"bla\", 1, 2]}]" + assert ( + str(parsedtype) + == 'struct[{"1": int64[parameters={"xkcd": [11, 12, 13]}], "2": int64}, parameters={"wonky": ["bla", 1, 2]}]' + ) def test_option_numpy_1(): @@ -230,7 +239,9 @@ def test_arraytype_bytestring(): def test_arraytype_categorical_1(): text = str( - ak._v2.behaviors.categorical.to_categorical(ak._v2.Array(["one", "one", "two", "three", "one", "three"])).type + ak._v2.behaviors.categorical.to_categorical( + ak._v2.Array(["one", "one", "two", "three", "one", "three"]) + ).type ) parsedtype = ak._v2.types.from_datashape(text, True) assert isinstance(parsedtype, ak._v2.types.ArrayType) @@ -238,7 +249,11 @@ def test_arraytype_categorical_1(): def test_arraytype_categorical_2(): - text = str(ak._v2.behaviors.categorical.to_categorical(ak._v2.Array([1.1, 1.1, 2.2, 3.3, 1.1, 3.3])).type) + text = str( + ak._v2.behaviors.categorical.to_categorical( + ak._v2.Array([1.1, 1.1, 2.2, 3.3, 1.1, 3.3]) + ).type + ) parsedtype = ak._v2.types.from_datashape(text, True) assert isinstance(parsedtype, ak._v2.types.ArrayType) assert str(parsedtype) == text @@ -248,14 +263,14 @@ def test_arraytype_record_1(): text = '3 * Thingy["x": int64, "y": float64]' parsedtype = ak._v2.types.from_datashape(text, True) assert isinstance(parsedtype, ak._v2.types.ArrayType) - assert str(parsedtype) == '3 * Thingy[x: int64, y: float64]' + assert str(parsedtype) == "3 * Thingy[x: int64, y: float64]" def test_arraytype_record_2(): text = '3 * var * Thingy["x": int64, "y": float64]' parsedtype = ak._v2.types.from_datashape(text, True) assert isinstance(parsedtype, ak._v2.types.ArrayType) - assert str(parsedtype) == '3 * var * Thingy[x: int64, y: float64]' + assert str(parsedtype) == "3 * var * Thingy[x: int64, y: float64]" def test_arraytype_1(): @@ -286,7 +301,9 @@ def test_arraytype_3(): def test_arraytype_4(): text = str( - ak.with_parameter(ak._v2.Array([[1, 2, 3], None, [4, 5]]), "wonky", "string").type + ak.with_parameter( + ak._v2.Array([[1, 2, 3], None, [4, 5]]), "wonky", "string" + ).type ) parsedtype = ak._v2.types.from_datashape(text) assert str(parsedtype) == text @@ -383,13 +400,13 @@ def test_arraytype_15(): def test_arraytype_16(): - text = '7 * ?union[int64, var * int64]' + text = "7 * ?union[int64, var * int64]" parsedtype = ak._v2.types.from_datashape(text) assert str(parsedtype) == text def test_arraytype_17(): - text = '7 * ?union[int64, var * unknown]' + text = "7 * ?union[int64, var * unknown]" parsedtype = ak._v2.types.from_datashape(text) assert str(parsedtype) == text @@ -412,4 +429,4 @@ def test_record_highlevel(): text = 'Thingy["x": int64, "y": float64]' parsedtype = ak._v2.types.from_datashape(text, True) assert isinstance(parsedtype, ak._v2.types.RecordType) - assert str(parsedtype) == 'Thingy[x: int64, y: float64]' + assert str(parsedtype) == "Thingy[x: int64, y: float64]"