From 662b724bbdfa2b0a101f8efeaa1060e76559a0aa Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Tue, 12 Nov 2019 17:09:05 -0800 Subject: [PATCH 1/6] Modified size variable in GetUnicodeTX to -1 --- src/NativeBridge/DataViewInterop.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/NativeBridge/DataViewInterop.h b/src/NativeBridge/DataViewInterop.h index 9d9f19f1..658b0ce1 100644 --- a/src/NativeBridge/DataViewInterop.h +++ b/src/NativeBridge/DataViewInterop.h @@ -240,6 +240,7 @@ class DataSourceBlock if (bp::extract(str(s).encode("utf_8")).check()) { + size = -1; missing = -1; pch = bp::extract(str(s).encode("utf_8")); #if _MSC_VER From 04725ef68428cee0643604fe0f27eb2a81364966 Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Tue, 12 Nov 2019 17:12:38 -0800 Subject: [PATCH 2/6] Update DataViewInterop.h --- src/NativeBridge/DataViewInterop.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/NativeBridge/DataViewInterop.h b/src/NativeBridge/DataViewInterop.h index 658b0ce1..b4b6f15e 100644 --- a/src/NativeBridge/DataViewInterop.h +++ b/src/NativeBridge/DataViewInterop.h @@ -240,7 +240,7 @@ class DataSourceBlock if (bp::extract(str(s).encode("utf_8")).check()) { - size = -1; + size = -1; missing = -1; pch = bp::extract(str(s).encode("utf_8")); #if _MSC_VER From b72479ee88745c8f7b2e16895c5ce1454f3ab5c6 Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Wed, 13 Nov 2019 12:04:53 -0800 Subject: [PATCH 3/6] Fixed spacing in DataViewInterop.h --- src/NativeBridge/DataViewInterop.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/NativeBridge/DataViewInterop.h b/src/NativeBridge/DataViewInterop.h index b4b6f15e..f9e87763 100644 --- a/src/NativeBridge/DataViewInterop.h +++ b/src/NativeBridge/DataViewInterop.h @@ -240,7 +240,7 @@ class DataSourceBlock if (bp::extract(str(s).encode("utf_8")).check()) { - size = -1; + size = -1; missing = -1; pch = bp::extract(str(s).encode("utf_8")); #if _MSC_VER From 4b1f41c17449fea97337b7908293e65cfddebf89 Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Wed, 13 Nov 2019 13:44:52 -0800 Subject: [PATCH 4/6] Re-enabled skipped test due to Py2.7 encoding/decoding issue --- src/python/nimbusml/tests/ensemble/test_lightgbmclassifier.py | 2 -- .../tests/feature_extraction/text/test_ngramfeaturizer.py | 2 -- .../nimbusml/tests/naive_bayes/test_naivebayesclassifier.py | 2 -- 3 files changed, 6 deletions(-) diff --git a/src/python/nimbusml/tests/ensemble/test_lightgbmclassifier.py b/src/python/nimbusml/tests/ensemble/test_lightgbmclassifier.py index 0c31c9ff..addc22fd 100644 --- a/src/python/nimbusml/tests/ensemble/test_lightgbmclassifier.py +++ b/src/python/nimbusml/tests/ensemble/test_lightgbmclassifier.py @@ -19,8 +19,6 @@ class TestLightGbmClassifier(unittest.TestCase): - @unittest.skipIf(platform.system() in ("Linux", "Darwin") and six.PY2, - "encoding/decoding issues with linux py2.7, bug 286536") def test_lightgbmclassifier(self): np.random.seed(0) train_file = get_dataset('wiki_detox_train').as_filepath() diff --git a/src/python/nimbusml/tests/feature_extraction/text/test_ngramfeaturizer.py b/src/python/nimbusml/tests/feature_extraction/text/test_ngramfeaturizer.py index 6b183b91..2f3ddd9a 100644 --- a/src/python/nimbusml/tests/feature_extraction/text/test_ngramfeaturizer.py +++ b/src/python/nimbusml/tests/feature_extraction/text/test_ngramfeaturizer.py @@ -18,8 +18,6 @@ class TestNGramFeaturizer(unittest.TestCase): - @unittest.skipIf(os.name != "nt" and six.PY2, - "encoding/decoding issues with linux py2.7, bug 286536") def test_ngramfeaturizer(self): np.random.seed(0) train_file = get_dataset('wiki_detox_train').as_filepath() diff --git a/src/python/nimbusml/tests/naive_bayes/test_naivebayesclassifier.py b/src/python/nimbusml/tests/naive_bayes/test_naivebayesclassifier.py index 4b414c38..36d44b85 100644 --- a/src/python/nimbusml/tests/naive_bayes/test_naivebayesclassifier.py +++ b/src/python/nimbusml/tests/naive_bayes/test_naivebayesclassifier.py @@ -19,8 +19,6 @@ class TestNaiveBayesClassifier(unittest.TestCase): - @unittest.skipIf(os.name != "nt" and six.PY2, - "encoding/decoding issues with linux py2.7, bug 286536") def test_naivebayesclassifier(self): np.random.seed(0) train_file = get_dataset("wiki_detox_train").as_filepath() From e51a64b1ee1c3cb6f2f861e1b3d480976311a2d2 Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Wed, 13 Nov 2019 13:45:32 -0800 Subject: [PATCH 5/6] Removed unnecessary invoking of .sum() --- .../tests/feature_extraction/text/test_ngramfeaturizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/nimbusml/tests/feature_extraction/text/test_ngramfeaturizer.py b/src/python/nimbusml/tests/feature_extraction/text/test_ngramfeaturizer.py index 2f3ddd9a..daf60bb0 100644 --- a/src/python/nimbusml/tests/feature_extraction/text/test_ngramfeaturizer.py +++ b/src/python/nimbusml/tests/feature_extraction/text/test_ngramfeaturizer.py @@ -34,7 +34,7 @@ def test_ngramfeaturizer(self): word_feature_extractor=n_gram(), vector_normalizer='None') << 'SentimentText' X_train = texttransform.fit_transform(X_train[:100]) - sum = X_train.iloc[:].sum().sum() + sum = X_train.iloc[:].sum() print(sum) assert_equal(sum, 30513, "sum of all features is incorrect!") From ddcea89da810f09c0c8d384f1068487247ed4366 Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Wed, 13 Nov 2019 14:01:26 -0800 Subject: [PATCH 6/6] Revert "Removed unnecessary invoking of .sum()" This reverts commit e51a64b1ee1c3cb6f2f861e1b3d480976311a2d2. --- .../tests/feature_extraction/text/test_ngramfeaturizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/nimbusml/tests/feature_extraction/text/test_ngramfeaturizer.py b/src/python/nimbusml/tests/feature_extraction/text/test_ngramfeaturizer.py index daf60bb0..2f3ddd9a 100644 --- a/src/python/nimbusml/tests/feature_extraction/text/test_ngramfeaturizer.py +++ b/src/python/nimbusml/tests/feature_extraction/text/test_ngramfeaturizer.py @@ -34,7 +34,7 @@ def test_ngramfeaturizer(self): word_feature_extractor=n_gram(), vector_normalizer='None') << 'SentimentText' X_train = texttransform.fit_transform(X_train[:100]) - sum = X_train.iloc[:].sum() + sum = X_train.iloc[:].sum().sum() print(sum) assert_equal(sum, 30513, "sum of all features is incorrect!")