Skip to content

Commit

Permalink
Mixed dtype issue (#205)
Browse files Browse the repository at this point in the history
* coalesce data_types into data_type_lookup

* merge fixed

* merge conflicts

* add warning and suggestion on how to fix

* formatting for warnings version

* change to internal data

* legibility update

* test added

* update test

* test updated

* xlrd in dev reqs

* black

* update link

* changes to test logic, minor string format for warning

Co-authored-by: Doris Lee <dorisjunglinlee@gmail.com>
  • Loading branch information
jinimukh and dorisjlee committed Jan 9, 2021
1 parent e1430df commit 14c141b
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 4 deletions.
18 changes: 14 additions & 4 deletions lux/executor/PandasExecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,10 +238,20 @@ def execute_aggregate(vis: Vis, isFiltered=True):
assert (
len(list(vis.data[groupby_attr.attribute])) == N_unique_vals
), f"Aggregated data missing values compared to original range of values of `{groupby_attr.attribute}`."
vis._vis_data = vis.data.dropna(subset=[measure_attr.attribute])
vis._vis_data = vis.data.sort_values(by=groupby_attr.attribute, ascending=True)
vis._vis_data = vis.data.reset_index()
vis._vis_data = vis.data.drop(columns="index")

vis._vis_data = vis._vis_data.dropna(subset=[measure_attr.attribute])
try:
vis._vis_data = vis._vis_data.sort_values(by=groupby_attr.attribute, ascending=True)
except TypeError:
warnings.warn(
f"\nLux detects that the attribute '{groupby_attr.attribute}' maybe contain mixed type."
+ f"\nTo visualize this attribute, you may want to convert the '{groupby_attr.attribute}' into a uniform type as follows:"
+ f"\n\tdf['{groupby_attr.attribute}'] = df['{groupby_attr.attribute}'].astype(str)"
)
vis._vis_data[groupby_attr.attribute] = vis._vis_data[groupby_attr.attribute].astype(str)
vis._vis_data = vis._vis_data.sort_values(by=groupby_attr.attribute, ascending=True)
vis._vis_data = vis._vis_data.reset_index()
vis._vis_data = vis._vis_data.drop(columns="index")

@staticmethod
def execute_binning(vis: Vis):
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ pytest>=5.3.1
pytest-cov>=2.8.1
Sphinx>=3.0.2
sphinx-rtd-theme>=0.4.3
xlrd
black
10 changes: 10 additions & 0 deletions tests/test_pandas_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from .context import lux
import pytest
import pandas as pd
import numpy as np
import warnings

###################
# DataFrame Tests #
Expand Down Expand Up @@ -678,3 +680,11 @@ def test_read_sas(global_var):
df._repr_html_()
assert list(df.recommendation.keys()) == ["Correlation", "Distribution", "Temporal"]
assert len(df.data_type) == 6


def test_read_multi_dtype(global_var):
url = "https://github.com/lux-org/lux-datasets/blob/master/data/car-data.xls?raw=true"
df = pd.read_excel(url)
with pytest.warns(UserWarning, match="mixed type") as w:
df._repr_html_()
assert "df['Car Type'] = df['Car Type'].astype(str)" in str(w[-1].message)

0 comments on commit 14c141b

Please sign in to comment.