From fda5d3bbfbffafe49f7f7fcf729cdfa06d92a042 Mon Sep 17 00:00:00 2001 From: Kunal Agarwal Date: Tue, 24 Nov 2020 17:45:17 -0800 Subject: [PATCH 01/10] fix read_json bug and add tests --- lux/core/__init__.py | 5 ++++- tests/test_pandas_coverage.py | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/lux/core/__init__.py b/lux/core/__init__.py index 2d72ffb8..4605c1b5 100644 --- a/lux/core/__init__.py +++ b/lux/core/__init__.py @@ -22,9 +22,12 @@ def setOption(overridePandas=True): if overridePandas: - pd.DataFrame = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = LuxDataFrame + pd.DataFrame = pd.io.json._json.DataFrame = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = LuxDataFrame else: pd.DataFrame = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = originalDF setOption(overridePandas=True) + +# global latest_history +# global diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index e224e404..02dd1374 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -560,3 +560,27 @@ def test_str_replace(global_var): ], "Metadata is lost when going from Dataframe to Series." assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series." assert series.name == "Brand", "Pandas Series original `name` property not retained." + +################ +# Read Tests # +################ + +def test_read_json(global_var): + df = pd.read_json('lux/data/car.json') + df._repr_html_() + assert list(new_df.recommendation.keys()) == ['Correlation', 'Distribution', 'Occurrence', 'Temporal'] + assert len(new_df.data_type_lookup) == 10 + +def test_read_html(global_var): + df = pd.read_html('lux/data/horror.html')[1] + df._repr_html_() + assert list(new_df.recommendation.keys()) == ['Occurrence'] + assert len(new_df.data_type_lookup) == 5 + +def test_read_sas(global_var): + df = pd.read_sas('lux/data/airline.sas7bdat')[1] + df._repr_html_() + assert list(new_df.recommendation.keys()) == ['Correlation', 'Distribution', 'Temporal'] + assert len(new_df.data_type_lookup) == 6 + + From cbc38679139e32c8ddb186ab51350687a806cb38 Mon Sep 17 00:00:00 2001 From: Kunal Agarwal Date: Tue, 24 Nov 2020 17:52:26 -0800 Subject: [PATCH 02/10] convert tests to reference lux-datasets --- tests/test_pandas_coverage.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index 02dd1374..73d95d7d 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -566,19 +566,22 @@ def test_str_replace(global_var): ################ def test_read_json(global_var): - df = pd.read_json('lux/data/car.json') + url = 'https://github.com/lux-org/lux-datasets/blob/master/data/car.json?raw=true' + df = pd.read_csv(url) df._repr_html_() assert list(new_df.recommendation.keys()) == ['Correlation', 'Distribution', 'Occurrence', 'Temporal'] assert len(new_df.data_type_lookup) == 10 def test_read_html(global_var): - df = pd.read_html('lux/data/horror.html')[1] + url = 'https://github.com/lux-org/lux-datasets/blob/master/data/horror.html?raw=true' + df = pd.read_html(url)[1] df._repr_html_() assert list(new_df.recommendation.keys()) == ['Occurrence'] assert len(new_df.data_type_lookup) == 5 def test_read_sas(global_var): - df = pd.read_sas('lux/data/airline.sas7bdat')[1] + url = 'https://github.com/lux-org/lux-datasets/blob/master/data/airline.sas7bdat?raw=true' + df = pd.read_sas(url) df._repr_html_() assert list(new_df.recommendation.keys()) == ['Correlation', 'Distribution', 'Temporal'] assert len(new_df.data_type_lookup) == 6 From ad3ff758de398149166647e04906a50ad5fb8c7b Mon Sep 17 00:00:00 2001 From: Kunal Agarwal Date: Tue, 24 Nov 2020 17:53:25 -0800 Subject: [PATCH 03/10] run black --- lux/core/__init__.py | 6 ++++-- tests/test_pandas_coverage.py | 23 +++++++++++++++-------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/lux/core/__init__.py b/lux/core/__init__.py index 4605c1b5..342f2a48 100644 --- a/lux/core/__init__.py +++ b/lux/core/__init__.py @@ -22,7 +22,9 @@ def setOption(overridePandas=True): if overridePandas: - pd.DataFrame = pd.io.json._json.DataFrame = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = LuxDataFrame + pd.DataFrame = ( + pd.io.json._json.DataFrame + ) = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = LuxDataFrame else: pd.DataFrame = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = originalDF @@ -30,4 +32,4 @@ def setOption(overridePandas=True): setOption(overridePandas=True) # global latest_history -# global +# global diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index 73d95d7d..e65cf191 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -561,29 +561,36 @@ def test_str_replace(global_var): assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series." assert series.name == "Brand", "Pandas Series original `name` property not retained." + ################ # Read Tests # ################ + def test_read_json(global_var): - url = 'https://github.com/lux-org/lux-datasets/blob/master/data/car.json?raw=true' + url = "https://github.com/lux-org/lux-datasets/blob/master/data/car.json?raw=true" df = pd.read_csv(url) df._repr_html_() - assert list(new_df.recommendation.keys()) == ['Correlation', 'Distribution', 'Occurrence', 'Temporal'] + assert list(new_df.recommendation.keys()) == [ + "Correlation", + "Distribution", + "Occurrence", + "Temporal", + ] assert len(new_df.data_type_lookup) == 10 + def test_read_html(global_var): - url = 'https://github.com/lux-org/lux-datasets/blob/master/data/horror.html?raw=true' + url = "https://github.com/lux-org/lux-datasets/blob/master/data/horror.html?raw=true" df = pd.read_html(url)[1] df._repr_html_() - assert list(new_df.recommendation.keys()) == ['Occurrence'] + assert list(new_df.recommendation.keys()) == ["Occurrence"] assert len(new_df.data_type_lookup) == 5 + def test_read_sas(global_var): - url = 'https://github.com/lux-org/lux-datasets/blob/master/data/airline.sas7bdat?raw=true' + url = "https://github.com/lux-org/lux-datasets/blob/master/data/airline.sas7bdat?raw=true" df = pd.read_sas(url) df._repr_html_() - assert list(new_df.recommendation.keys()) == ['Correlation', 'Distribution', 'Temporal'] + assert list(new_df.recommendation.keys()) == ["Correlation", "Distribution", "Temporal"] assert len(new_df.data_type_lookup) == 6 - - From e8a2ec4cd9dfa62f58b35e44004465dfe83d5513 Mon Sep 17 00:00:00 2001 From: Kunal Agarwal Date: Tue, 24 Nov 2020 17:55:19 -0800 Subject: [PATCH 04/10] remove comments --- lux/core/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/lux/core/__init__.py b/lux/core/__init__.py index 342f2a48..23585503 100644 --- a/lux/core/__init__.py +++ b/lux/core/__init__.py @@ -30,6 +30,3 @@ def setOption(overridePandas=True): setOption(overridePandas=True) - -# global latest_history -# global From ef282a5b12d11761f3d9ab35e64faa494a7e0df4 Mon Sep 17 00:00:00 2001 From: Doris Lee Date: Wed, 25 Nov 2020 15:16:37 +0800 Subject: [PATCH 05/10] Update __init__.py --- lux/core/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lux/core/__init__.py b/lux/core/__init__.py index 23585503..8512a5dd 100644 --- a/lux/core/__init__.py +++ b/lux/core/__init__.py @@ -22,11 +22,11 @@ def setOption(overridePandas=True): if overridePandas: - pd.DataFrame = ( - pd.io.json._json.DataFrame - ) = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = LuxDataFrame + pd.DataFrame = pd.core.frame.DataFrame = LuxDataFrame + pd.DataFrame = pd.io.json._json.DataFrame = pd.io.parsers.DataFrame else: - pd.DataFrame = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = originalDF + pd.DataFrame = pd.core.frame.DataFrame = originalDF + pd.DataFrame = pd.io.json._json.DataFrame = pd.io.parsers.DataFrame = originalDF setOption(overridePandas=True) From 2b3a09e8bdaccda33b75215080f804e678d96a92 Mon Sep 17 00:00:00 2001 From: Doris Lee Date: Wed, 25 Nov 2020 15:19:15 +0800 Subject: [PATCH 06/10] Update test_pandas_coverage.py new_df --> df --- tests/test_pandas_coverage.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index e65cf191..58671ca8 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -571,26 +571,26 @@ def test_read_json(global_var): url = "https://github.com/lux-org/lux-datasets/blob/master/data/car.json?raw=true" df = pd.read_csv(url) df._repr_html_() - assert list(new_df.recommendation.keys()) == [ + assert list(df.recommendation.keys()) == [ "Correlation", "Distribution", "Occurrence", "Temporal", ] - assert len(new_df.data_type_lookup) == 10 + assert len(df.data_type_lookup) == 10 def test_read_html(global_var): url = "https://github.com/lux-org/lux-datasets/blob/master/data/horror.html?raw=true" df = pd.read_html(url)[1] df._repr_html_() - assert list(new_df.recommendation.keys()) == ["Occurrence"] - assert len(new_df.data_type_lookup) == 5 + assert list(df.recommendation.keys()) == ["Occurrence"] + assert len(df.data_type_lookup) == 5 def test_read_sas(global_var): url = "https://github.com/lux-org/lux-datasets/blob/master/data/airline.sas7bdat?raw=true" df = pd.read_sas(url) df._repr_html_() - assert list(new_df.recommendation.keys()) == ["Correlation", "Distribution", "Temporal"] - assert len(new_df.data_type_lookup) == 6 + assert list(df.recommendation.keys()) == ["Correlation", "Distribution", "Temporal"] + assert len(df.data_type_lookup) == 6 From 0e073f07e570952e6a8c448984dcace045b94415 Mon Sep 17 00:00:00 2001 From: Kunal Agarwal Date: Tue, 24 Nov 2020 23:42:16 -0800 Subject: [PATCH 07/10] fix tests to work with lux-datasets --- tests/test_pandas_coverage.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index e65cf191..5848209d 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -568,29 +568,29 @@ def test_str_replace(global_var): def test_read_json(global_var): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/car.json?raw=true" - df = pd.read_csv(url) + url = "https://raw.githubusercontent.com/lux-org/lux-datasets/master/data/car.json" + df = pd.read_json(url) df._repr_html_() - assert list(new_df.recommendation.keys()) == [ + assert list(df.recommendation.keys()) == [ "Correlation", "Distribution", "Occurrence", "Temporal", ] - assert len(new_df.data_type_lookup) == 10 + assert len(df.data_type_lookup) == 10 def test_read_html(global_var): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/horror.html?raw=true" + url = "https://raw.githubusercontent.com/lux-org/lux-datasets/master/data/horror.html" df = pd.read_html(url)[1] df._repr_html_() - assert list(new_df.recommendation.keys()) == ["Occurrence"] - assert len(new_df.data_type_lookup) == 5 + assert list(df.recommendation.keys()) == ["Occurrence"] + assert len(df.data_type_lookup) == 5 def test_read_sas(global_var): url = "https://github.com/lux-org/lux-datasets/blob/master/data/airline.sas7bdat?raw=true" - df = pd.read_sas(url) + df = pd.read_sas(url, format="sas7bdat") df._repr_html_() - assert list(new_df.recommendation.keys()) == ["Correlation", "Distribution", "Temporal"] - assert len(new_df.data_type_lookup) == 6 + assert list(df.recommendation.keys()) == ["Correlation", "Distribution", "Temporal"] + assert len(df.data_type_lookup) == 6 From b4c5d373843d40f927c133c8dfa42ea82e8907a2 Mon Sep 17 00:00:00 2001 From: Kunal Agarwal Date: Wed, 25 Nov 2020 00:04:06 -0800 Subject: [PATCH 08/10] fix init --- lux/core/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lux/core/__init__.py b/lux/core/__init__.py index 8512a5dd..23585503 100644 --- a/lux/core/__init__.py +++ b/lux/core/__init__.py @@ -22,11 +22,11 @@ def setOption(overridePandas=True): if overridePandas: - pd.DataFrame = pd.core.frame.DataFrame = LuxDataFrame - pd.DataFrame = pd.io.json._json.DataFrame = pd.io.parsers.DataFrame + pd.DataFrame = ( + pd.io.json._json.DataFrame + ) = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = LuxDataFrame else: - pd.DataFrame = pd.core.frame.DataFrame = originalDF - pd.DataFrame = pd.io.json._json.DataFrame = pd.io.parsers.DataFrame = originalDF + pd.DataFrame = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = originalDF setOption(overridePandas=True) From 426cf176d720a805dbc796fd21feb2f92bfca76a Mon Sep 17 00:00:00 2001 From: Kunal Agarwal Date: Wed, 25 Nov 2020 14:36:26 -0800 Subject: [PATCH 09/10] remove lxml dependency --- tests/test_pandas_coverage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index 5848209d..1bd73b13 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -582,7 +582,7 @@ def test_read_json(global_var): def test_read_html(global_var): url = "https://raw.githubusercontent.com/lux-org/lux-datasets/master/data/horror.html" - df = pd.read_html(url)[1] + df = pd.read_html(url, flavor="html5lib")[1] df._repr_html_() assert list(df.recommendation.keys()) == ["Occurrence"] assert len(df.data_type_lookup) == 5 From 5c1c47f373e8c372eb309c2f52ec3bd8928554b9 Mon Sep 17 00:00:00 2001 From: Kunal Agarwal Date: Wed, 25 Nov 2020 18:58:15 -0800 Subject: [PATCH 10/10] remove html test --- tests/test_pandas_coverage.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index 1bd73b13..9e08c859 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -580,14 +580,6 @@ def test_read_json(global_var): assert len(df.data_type_lookup) == 10 -def test_read_html(global_var): - url = "https://raw.githubusercontent.com/lux-org/lux-datasets/master/data/horror.html" - df = pd.read_html(url, flavor="html5lib")[1] - df._repr_html_() - assert list(df.recommendation.keys()) == ["Occurrence"] - assert len(df.data_type_lookup) == 5 - - def test_read_sas(global_var): url = "https://github.com/lux-org/lux-datasets/blob/master/data/airline.sas7bdat?raw=true" df = pd.read_sas(url, format="sas7bdat")