Skip to content
This repository was archived by the owner on Nov 16, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 29 additions & 14 deletions src/python/tests_extended/data_frame_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ def _validate_type(self, input_meta, col_type):
return
elif expected_type == np.int64 and str(col_type) == 'datetime64[ns]':
return
elif expected_type == np.uint32 and str(col_type) == 'category':
return
elif expected_type in ort_float_set and str(col_type) in pd_float_set:
return
elif expected_type in ort_int_set and str(col_type) in pd_int_set:
Expand Down Expand Up @@ -172,21 +174,34 @@ def execute(self, df, output_names=None, output_types=None, run_options=None):
"""
input_feed = self._get_input_feeds(df, self._sess);
if not output_names:
output_names = [output.name for output in self._sess._outputs_meta]
output_names = [output.name for output in self._sess._outputs_meta]

results = self._sess.run(output_names, input_feed, run_options)

df = pd.DataFrame()
for i in range(len(results)):
r = results[i].flatten()
if output_types and output_names[i] in output_types:
dtype = output_types[output_names[i]]
if dtype == np.dtype('datetime64'):
r = r.astype(np.int64)
r = [datetime.utcfromtimestamp(ts) for ts in r]
else:
r = r.astype(dtype)

df[output_names[i]] = r

for i, r in enumerate(results):
# TODO: remove this. These extra columns
# should not be in the output.
if output_names[i].startswith('mlnet.') and \
output_names[i].endswith('.unusedOutput') and \
r.shape == (1,1):
continue

r = np.split(r, r.shape[-1], axis=-1) \
if r.shape[-1] > 1 else [r]

for suffix, col in enumerate(r):
col = col.flatten()
if output_types and output_names[i] in output_types:
dtype = output_types[output_names[i]]
if dtype == np.dtype('datetime64'):
col = col.astype(np.int64)
col = [datetime.utcfromtimestamp(ts) for ts in col]
else:
col = col.astype(dtype)

col_name = output_names[i] if len(r) == 1 else \
output_names[i] + '.' + str(suffix)
df[col_name] = col

return df
111 changes: 64 additions & 47 deletions src/python/tests_extended/test_export_to_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
SsaForecaster)
from data_frame_tool import DataFrameTool as DFT

from data_frame_tool import DataFrameTool as DFT

SHOW_ONNX_JSON = False
SHOW_TRANSFORMED_RESULTS = True
Expand Down Expand Up @@ -291,54 +292,55 @@
}

EXPECTED_RESULTS = {
'AveragedPerceptronBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'AveragedPerceptronBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'CharTokenizer': {'num_cols': 424, 'cols': 0},
'ColumnConcatenator': {'num_cols': 11, 'cols': 0},
'ColumnDuplicator': {'num_cols': 7, 'cols': 0},
'ColumnSelector': {
'num_cols': 2,
'cols': [('Sepal_Width', 'Sepal_Width'), ('Sepal_Length', 'Sepal_Length')]
'cols': [('Sepal_Width', 'Sepal_Width', 'Sepal_Width.output'),
('Sepal_Length', 'Sepal_Length', 'Sepal_Length.output')]
},
#'EnsembleClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
#'EnsembleRegressor': {'cols': [('Score', 'Score')]},
'FastForestBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'FastForestRegressor': {'cols': [('Score', 'Score')]},
'FastLinearBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'FastLinearClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'FastLinearRegressor': {'cols': [('Score', 'Score')]},
'FastTreesBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'FastTreesRegressor': {'cols': [('Score', 'Score')]},
'FastTreesTweedieRegressor': {'cols': [('Score', 'Score')]},
'FastForestBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'FastForestRegressor': {'cols': [('Score', 'Score', 'Score.output')]},
'FastLinearBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'FastLinearClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'FastLinearRegressor': {'cols': [('Score', 'Score', 'Score.output')]},
'FastTreesBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'FastTreesRegressor': {'cols': [('Score', 'Score', 'Score.output')]},
'FastTreesTweedieRegressor': {'cols': [('Score', 'Score', 'Score.output')]},
'FromKey': {'num_cols': 6, 'cols': 0},
'GlobalContrastRowScaler': {'num_cols': 12, 'cols': 0},
'Handler': {'num_cols': 8, 'cols': 0},
'Indicator': {'num_cols': 7, 'cols': 0},
'KMeansPlusPlus': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'LightGbmBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'LightGbmClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'LightGbmRanker': {'cols': [('Score', 'Score')]},
'LightGbmRegressor': {'cols': [('Score', 'Score')]},
'LinearSvmBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'LogisticRegressionBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'LogisticRegressionClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'KMeansPlusPlus': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'LightGbmBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'LightGbmClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'LightGbmRanker': {'cols': [('Score', 'Score', 'Score.output')]},
'LightGbmRegressor': {'cols': [('Score', 'Score', 'Score.output')]},
'LinearSvmBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'LogisticRegressionBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'LogisticRegressionClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'LpScaler': {'num_cols': 10, 'cols': 0},
'MeanVarianceScaler': {'num_cols': 5, 'cols': 0},
'MinMaxScaler': {'num_cols': 5, 'cols': 0},
'MutualInformationSelector': {'num_cols': 8, 'cols': 0},
'NGramFeaturizer': {'num_cols': 273, 'cols': 0},
'NaiveBayesClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'NaiveBayesClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'OneHotVectorizer': {'num_cols': 12, 'cols': 0},
'OneVsRestClassifier(AveragedPerceptronBinaryClassifier)': \
{'cols': [('PredictedLabel', 'PredictedLabel')]},
{'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'OneVsRestClassifier(LinearSvmBinaryClassifier)': \
{'cols': [('PredictedLabel', 'PredictedLabel')]},
'OnlineGradientDescentRegressor': {'cols': [('Score', 'Score')]},
'OrdinaryLeastSquaresRegressor': {'cols': [('Score', 'Score')]},
{'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'OnlineGradientDescentRegressor': {'cols': [('Score', 'Score', 'Score.output')]},
'OrdinaryLeastSquaresRegressor': {'cols': [('Score', 'Score', 'Score.output')]},
'PcaTransformer': {'num_cols': 9, 'cols': 0},
'PoissonRegressionRegressor': {'cols': [('Score', 'Score')]},
'PoissonRegressionRegressor': {'cols': [('Score', 'Score', 'Score.output')]},
'PrefixColumnConcatenator': {'num_cols': 8, 'cols': 0},
'SgdBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'SymSgdBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]},
'SgdBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'SymSgdBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]},
'ToKey': {'num_cols': 11, 'cols': 0},
'TypeConverter': {'num_cols': 8, 'cols': 0},
'WordTokenizer': {'num_cols': 73, 'cols': 0}
Expand Down Expand Up @@ -449,7 +451,7 @@ def load_json(file_path):
return json.loads(content_without_comments)


def print_results(result_expected, result_onnx):
def print_results(result_expected, result_onnx, result_onnx_ort):
print("\nML.Net Output (Expected Result):")
print(result_expected)
if not isinstance(result_expected, pd.Series):
Expand All @@ -460,8 +462,12 @@ def print_results(result_expected, result_onnx):
if not isinstance(result_onnx, pd.Series):
print('Columns', result_onnx.columns)

print("\nORT Result:")
print(result_onnx_ort)
if not isinstance(result_onnx_ort, pd.Series):
print('Columns', result_onnx_ort.columns)

def validate_results(class_name, result_expected, result_onnx):
def validate_results(class_name, result_expected, result_onnx, result_ort):
if not class_name in EXPECTED_RESULTS:
raise RuntimeError("ERROR: ONNX model executed but no results specified for comparison.")

Expand All @@ -474,34 +480,43 @@ def validate_results(class_name, result_expected, result_onnx):
if len(result_onnx.columns) != num_cols:
raise RuntimeError("ERROR: The ONNX output does not contain the expected number of columns.")

col_pairs = EXPECTED_RESULTS[class_name]['cols']
if len(result_ort.columns) != num_cols:
raise RuntimeError("ERROR: The ORT output does not contain the expected number of columns.")

col_tuples = EXPECTED_RESULTS[class_name]['cols']

if isinstance(col_pairs, int):
if isinstance(col_tuples, int):
# If col_pairs is an int then slice the columns
# based on the value and use those pairs for comparison
col_pairs = list(zip(result_expected.columns[col_pairs:],
result_onnx.columns[col_pairs:]))
col_tuples = list(zip(result_expected.columns[col_tuples:],
result_onnx.columns[col_tuples:],
result_ort.columns[col_tuples:]))

if not col_pairs:
if not col_tuples:
raise RuntimeError("ERROR: no columns specified for comparison of results.")

for col_pair in col_pairs:
for col_tuple in col_tuples:
try:
col_expected = result_expected.loc[:, col_pair[0]]
col_onnx = result_onnx.loc[:, col_pair[1]]
col_expected = result_expected.loc[:, col_tuple[0]]
col_onnx = result_onnx.loc[:, col_tuple[1]]
col_ort = result_ort.loc[:, col_tuple[2]]

if isinstance(col_expected.dtype, pd.api.types.CategoricalDtype):
# ONNX does not export categorical columns so convert categorical
# columns received from ML.Net back to the original values before
# the comparison.
col_expected = col_expected.astype(col_expected.dtype.categories.dtype)

pd.testing.assert_series_equal(col_expected,
col_onnx,
check_names=False,
check_exact=False,
check_dtype=True,
check_less_precise=True)
check_kwargs = {
'check_names': False,
'check_exact': False,
'check_dtype': True,
'check_less_precise': True
}

pd.testing.assert_series_equal(col_expected, col_onnx, **check_kwargs)
pd.testing.assert_series_equal(col_expected, col_ort, **check_kwargs)

except Exception as e:
print(e)
raise RuntimeError("ERROR: OnnxRunner result does not match expected result.")
Expand Down Expand Up @@ -566,15 +581,17 @@ def test_export_to_onnx(estimator, class_name):
try:
onnxrunner = OnnxRunner(model_file=onnx_path)
result_onnx = onnxrunner.fit_transform(dataset)

df_tool = DFT(onnx_path)
result_onnx1 = df_tool.execute(dataset, [])
result_ort = df_tool.execute(dataset, [])

if SHOW_TRANSFORMED_RESULTS:
print_results(result_expected, result_onnx)
print_results(result_expected, result_onnx, result_ort)

export_valid = validate_results(class_name,
result_expected,
result_onnx)
result_onnx,
result_ort)
except Exception as e:
print(e)

Expand All @@ -599,8 +616,8 @@ def test_export_to_onnx(estimator, class_name):
for entry_point in entry_points:
class_name = entry_point['NewName']

if not class_name in ['Handler']:
continue
# if not class_name in ['Handler']:
# continue

print('\n===========> %s' % class_name)

Expand Down