Skip to content

Commit

Permalink
Fix issue with read_json and add tests for different file types (#156)
Browse files Browse the repository at this point in the history
* fix read_json bug and add tests

* convert tests to reference lux-datasets

* run black

* remove comments

* Update __init__.py

* Update test_pandas_coverage.py

new_df --> df

* fix tests to work with lux-datasets

* fix init

* remove lxml dependency

* remove html test

Co-authored-by: Doris Lee <dorisjunglinlee@gmail.com>
  • Loading branch information
westernguy2 and dorisjlee committed Nov 26, 2020
1 parent 8991206 commit 8e42fb8
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
4 changes: 3 additions & 1 deletion lux/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@

def setOption(overridePandas=True):
if overridePandas:
pd.DataFrame = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = LuxDataFrame
pd.DataFrame = (
pd.io.json._json.DataFrame
) = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = LuxDataFrame
else:
pd.DataFrame = pd.io.parsers.DataFrame = pd.core.frame.DataFrame = originalDF

Expand Down
26 changes: 26 additions & 0 deletions tests/test_pandas_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,3 +558,29 @@ def test_str_replace(global_var):
], "Metadata is lost when going from Dataframe to Series."
assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series."
assert series.name == "Brand", "Pandas Series original `name` property not retained."


################
# Read Tests #
################


def test_read_json(global_var):
url = "https://raw.githubusercontent.com/lux-org/lux-datasets/master/data/car.json"
df = pd.read_json(url)
df._repr_html_()
assert list(df.recommendation.keys()) == [
"Correlation",
"Distribution",
"Occurrence",
"Temporal",
]
assert len(df.data_type_lookup) == 10


def test_read_sas(global_var):
url = "https://github.com/lux-org/lux-datasets/blob/master/data/airline.sas7bdat?raw=true"
df = pd.read_sas(url, format="sas7bdat")
df._repr_html_()
assert list(df.recommendation.keys()) == ["Correlation", "Distribution", "Temporal"]
assert len(df.data_type_lookup) == 6

0 comments on commit 8e42fb8

Please sign in to comment.