From 3c5703ac674d81eafe2a0c789aca4e4f0a74a8fe Mon Sep 17 00:00:00 2001 From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com> Date: Tue, 31 Oct 2023 14:41:16 -0400 Subject: [PATCH] GH-38419: [MATLAB] Implement a `ClassTypeValidator` class that validates a MATLAB `cell` array contains only values of the same class type. (#38530) ### Rationale for this change Adding this `ClassTypeValidator` class is a step towards implementing the `arrow.array.ListArray.fromMATLAB()` method for creating `ListArray`s whose `ValueType`s is either a numeric, boolean, string, time32, or time64 array from a MATLAB `cell` array. ### What changes are included in this PR? Added an abstract class `arrow.array.internal.list.ListTypeValidator` that defines three abstract methods: 1. `validateElement(obj, element)` 2. `length = getElementLength(obj, element)` 3. `C = reshapeCellElements(obj, C)` These abstract methods will be used in `ListArray.fromMATLAB` to create `ListArray`s from MATLAB `cell` arrays. Below is a "pared-down" version of how the `fromMATLAB` algorithm will work: ```matlab function listArray = fromMATLAB(C) % Create the appropriate ListTypeValidator from the % first element in the cell array C validator = createListTypeValidator(C{1}); % Pre-allocate a uint32 vector for the offsets numRows = numel(C); offsets = zeros([numRows 1], "int32"); for ii = 1:numRows cellElement = C{ii}; % Validate cellElement can be used to create % one row in the ListArray, i.e. For example, % if the first element in C was a double, verify % cellElement is also a double. validator.validateElement(cellElement); % Determine how much to increment the % last offset value by to set the offset at index ii + 1. length = validator.getElementLEngth(cellElement); offsets[ii + 1] = length + offsets[i]; end % Reshape the elements in cell array C so that they % can be vertically concatenated. C = validator.reshapeCellElements(C); % Extract the cell array elements and vertically concatenate % them into one array. Then pass this array to arrow.array(). values = vertcat(C{:}); valueArray = arrow.array(values); % Create an Int32Array from offsets offsetArray = arrow.array(offsets); listArray = arrow.array.ListArray(Values=valueArray, Offsets=offsetArray) end ``` The concrete type of the `validator` object is created based on the first element in the `cell` array `C`. We use the first element to determine what kind of `ListArray` to construct from the input `cell` array. -- Added a concrete class called `arrow.array.internal.list.ClassTypeValidator`, which inherits from `arrow.array.internal.list.ListTypeValidator`: 1. `validateElement(obj, element)` - Throws an error if the element's class type does not match the expected value. 2. `length = getElementLength(obj, element)` - Returns the number of elements in the input array. 3. `C = reshapeCellElements(obj, C)` - Reshapes all elements in the `cell` array `C` to be column vectors. `ClassTypeValidator` will be used when creating `ListArray`s from MATLAB `cell` arrays containing "primitive types", such as numerics, strings, and durations. ### Are these changes tested? Yes. I added a new class called `tClassTypeValidator.m`. ### Are there any user-facing changes? No. ### Future Directions 1. #38420 2. #38417 3. #38354 * Closes: #38419 Authored-by: Sarah Gilmore Signed-off-by: Kevin Gurney --- .../+internal/+list/ClassTypeValidator.m | 47 +++++++++ .../+arrow/+array/+internal/+list/Validator.m | 28 ++++++ .../arrow/array/list/tClassTypeValidator.m | 99 +++++++++++++++++++ 3 files changed, 174 insertions(+) create mode 100644 matlab/src/matlab/+arrow/+array/+internal/+list/ClassTypeValidator.m create mode 100644 matlab/src/matlab/+arrow/+array/+internal/+list/Validator.m create mode 100644 matlab/test/arrow/array/list/tClassTypeValidator.m diff --git a/matlab/src/matlab/+arrow/+array/+internal/+list/ClassTypeValidator.m b/matlab/src/matlab/+arrow/+array/+internal/+list/ClassTypeValidator.m new file mode 100644 index 0000000000000..419560b8d566b --- /dev/null +++ b/matlab/src/matlab/+arrow/+array/+internal/+list/ClassTypeValidator.m @@ -0,0 +1,47 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef ClassTypeValidator < arrow.array.internal.list.Validator + + properties (GetAccess=public, SetAccess=private) + ClassName(1, 1) string + end + + methods + function obj = ClassTypeValidator(data) + obj.ClassName = class(data); + end + + function validateElement(obj, element) + if ~isa(element, obj.ClassName) + id = "arrow:array:list:ClassTypeMismatch"; + fmt = "Expected all cell array elements to have class type " + ... + """%s"", but encountered an element whose class type is" + ... + " ""%s""."; + msg = compose(fmt, obj.ClassName, class(element)); + error(id, msg); + end + end + + function length = getElementLength(~, element) + length = numel(element); + end + + function C = reshapeCellElements(~, C) + C = cellfun(@(elem) reshape(elem, [], 1), C, UniformOutput=false); + end + end +end + diff --git a/matlab/src/matlab/+arrow/+array/+internal/+list/Validator.m b/matlab/src/matlab/+arrow/+array/+internal/+list/Validator.m new file mode 100644 index 0000000000000..0178632bb6dd2 --- /dev/null +++ b/matlab/src/matlab/+arrow/+array/+internal/+list/Validator.m @@ -0,0 +1,28 @@ +%VALIDATOR Defines interface used to validate MATLAB cell arrays +%can be converted into Arrow List arrays. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef Validator + + methods (Abstract) + tf = validateElement(obj, element) + + length = getElementLength(obj, element) + + C = reshapeCellElements(obj, element) + end +end \ No newline at end of file diff --git a/matlab/test/arrow/array/list/tClassTypeValidator.m b/matlab/test/arrow/array/list/tClassTypeValidator.m new file mode 100644 index 0000000000000..ae6fe7c855754 --- /dev/null +++ b/matlab/test/arrow/array/list/tClassTypeValidator.m @@ -0,0 +1,99 @@ +%TCLASSTYPEVALIDATOR Unit tests for arrow.array.internal.list.ClassTypeValidator + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tClassTypeValidator < matlab.unittest.TestCase + + methods (Test) + function Smoke(testCase) + import arrow.array.internal.list.ClassTypeValidator + validator = ClassTypeValidator("Sample Data"); + testCase.verifyInstanceOf(validator, "arrow.array.internal.list.ClassTypeValidator"); + end + + function ClassNameGetter(testCase) + % Verify the ClassName getter returns the expected scalar + % string. + import arrow.array.internal.list.ClassTypeValidator + + validator = ClassTypeValidator("Sample Data"); + testCase.verifyEqual(validator.ClassName, "string"); + end + + function ClassNameNoSetter(testCase) + % Verify ClassName property is not settable. + import arrow.array.internal.list.ClassTypeValidator + + validator = ClassTypeValidator(1); + fcn = @() setfield(validator, "ClassName", "duration"); + testCase.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + + function ValidateElementNoThrow(testCase) %#ok + % Verify validateElement does not throw an exception + % if class type of the input element matches the ClassName + % property value. + import arrow.array.internal.list.ClassTypeValidator + + validator = ClassTypeValidator(1); + validator.validateElement(2); + validator.validateElement([1 2 3]); + validator.validateElement([1; 2; 3; 3]); + validator.validateElement([5 6; 7 8]); + validator.validateElement(double.empty(0, 1)); + end + + function ValidateElementClassTypeMismatchError(testCase) + % Verify validateElement throws an exception whose identifier + % is "arrow:array:list:ClassTypeMismatch" if the input + % element's class type does not match the ClassName property + % value. + import arrow.array.internal.list.ClassTypeValidator + + % validator will expect all elements to be of type double, since "1" is a double. + validator = ClassTypeValidator(1); + errorID = "arrow:array:list:ClassTypeMismatch"; + testCase.verifyError(@() validator.validateElement("A"), errorID); + testCase.verifyError(@() validator.validateElement(uint8([1 2])), errorID); + testCase.verifyError(@() validator.validateElement(datetime(2023, 1, 1)), errorID); + end + + function GetElementLength(testCase) + % Verify getElementLength returns the expected length values + % for the given input arrays. + import arrow.array.internal.list.ClassTypeValidator + + validator = ClassTypeValidator(1); + testCase.verifyEqual(validator.getElementLength(2), 1); + testCase.verifyEqual(validator.getElementLength([1 2; 3 4]), 4); + testCase.verifyEqual(validator.getElementLength(double.empty(1, 0)), 0); + end + + function ReshapeCellElements(testCase) + % Verify reshapeCellElements reshapes all elements in the input + % cell array into column vectors. + import arrow.array.internal.list.ClassTypeValidator + + validator = ClassTypeValidator(1); + C = {[1 2 3], [4; 5], [6 7; 8 9], double.empty(1, 0), 10}; + act = validator.reshapeCellElements(C); + exp = {[1; 2; 3], [4; 5], [6; 8; 7; 9], double.empty(0, 1), 10}; + testCase.verifyEqual(act, exp); + end + + end + +end \ No newline at end of file