lux-org · dorisjlee · Jan 7, 2021 · Jan 6, 2021 · Jan 6, 2021 · Jan 6, 2021
diff --git a/lux/core/frame.py b/lux/core/frame.py
@@ -155,7 +155,7 @@ def _set_item(self, key, value):
     def _infer_structure(self):
         # If the dataframe is very small and the index column is not a range index, then it is likely that this is an aggregated data
         is_multi_index_flag = self.index.nlevels != 1
-        not_int_index_flag = self.index.dtype != "int64"
+        not_int_index_flag = not pd.api.types.is_integer_dtype(self.index)
         small_df_flag = len(self) < 100
         self.pre_aggregated = (is_multi_index_flag or not_int_index_flag) and small_df_flag
         if "Number of Records" in self.columns:

diff --git a/lux/core/series.py b/lux/core/series.py
@@ -16,6 +16,7 @@
 import lux
 import warnings
 import traceback
+import numpy as np
 
 
 class LuxSeries(pd.Series):
@@ -45,14 +46,14 @@ def _constructor(self):
     def _constructor_expanddim(self):
         from lux.core.frame import LuxDataFrame
 
-        def f(*args, **kwargs):
-            df = LuxDataFrame(*args, **kwargs)
-            for attr in self._metadata:
-                df.__dict__[attr] = getattr(self, attr, None)
-            return df
+        # def f(*args, **kwargs):
+        #     df = LuxDataFrame(*args, **kwargs)
+        #     for attr in self._metadata:
+        #         df.__dict__[attr] = getattr(self, attr, None)
+        #     return df
 
-        f._get_axis_number = super(LuxSeries, self)._get_axis_number
-        return f
+        # f._get_axis_number = super(LuxSeries, self)._get_axis_number
+        return LuxDataFrame
 
     def to_pandas(self):
         import lux.core
@@ -75,7 +76,8 @@ def __repr__(self):
         ldf = LuxDataFrame(self)
 
         try:
-            if ldf._pandas_only:
+            is_dtype_series = all(isinstance(val, np.dtype) for val in self.values)
+            if ldf._pandas_only or is_dtype_series:
                 print(series_repr)
                 ldf._pandas_only = False
             else:

diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py
@@ -428,9 +428,7 @@ def compute_data_type(self, ldf: LuxDataFrame):
                 ldf.data_type[attr] = "temporal"
             else:
                 ldf.data_type[attr] = "nominal"
-        # for attr in list(df.dtypes[df.dtypes=="int64"].keys()):
-        #   if self.cardinality[attr]>50:
-        if ldf.index.dtype != "int64" and ldf.index.name:
+        if not pd.api.types.is_integer_dtype(ldf.index) and ldf.index.name:
             ldf.data_type[ldf.index.name] = "nominal"
 
         non_datetime_attrs = []
@@ -489,21 +487,15 @@ def compute_stats(self, ldf: LuxDataFrame):
             ldf.unique_values[attribute_repr] = list(ldf[attribute_repr].unique())
             ldf.cardinality[attribute_repr] = len(ldf.unique_values[attribute_repr])
 
-            # commenting this optimization out to make sure I can filter by cardinality when showing recommended vis
-
-            # if ldf.dtypes[attribute] != "float64":# and not pd.api.types.is_datetime64_ns_dtype(self.dtypes[attribute]):
-            #     ldf.unique_values[attribute_repr] = list(ldf[attribute].unique())
-            #     ldf.cardinality[attribute_repr] = len(ldf.unique_values[attribute])
-            # else:
-            #     ldf.cardinality[attribute_repr] = 999 # special value for non-numeric attribute
-
-            if ldf.dtypes[attribute] == "float64" or ldf.dtypes[attribute] == "int64":
+            if pd.api.types.is_float_dtype(ldf.dtypes[attribute]) or pd.api.types.is_integer_dtype(
+                ldf.dtypes[attribute]
+            ):
                 ldf._min_max[attribute_repr] = (
                     ldf[attribute].min(),
                     ldf[attribute].max(),
                 )
 
-        if ldf.index.dtype != "int64":
+        if not pd.api.types.is_integer_dtype(ldf.index):
             index_column_name = ldf.index.name
             ldf.unique_values[index_column_name] = list(ldf.index)
             ldf.cardinality[index_column_name] = len(ldf.index)
diff --git a/tests/test_nan.py b/tests/test_nan.py
@@ -22,11 +22,13 @@
 
 def test_nan_column(global_var):
     df = pytest.college_df
+    old_geo = df["Geography"]
     df["Geography"] = np.nan
     df._repr_html_()
     for visList in df.recommendation.keys():
         for vis in df.recommendation[visList]:
             assert vis.get_attr_by_attr_name("Geography") == []
+    df["Geography"] = old_geo
 
 
 def test_nan_data_type_detection():

diff --git a/tests/test_pandas.py b/tests/test_pandas.py
@@ -16,17 +16,6 @@
 import pytest
 import pandas as pd
 
-# def test_df_to_series():
-#     # Ensure metadata is kept when going from df to series
-#     df = pd.read_csv("lux/data/car.csv")
-#     df._repr_html_() # compute metadata
-#     assert df.cardinality is not None
-#     series = df["Weight"]
-#     assert isinstance(series,lux.core.series.LuxSeries), "Derived series is type LuxSeries."
-#     assert df["Weight"]._metadata == ['name','_intent', 'data_type_lookup', 'data_type', 'data_model_lookup', 'data_model', 'unique_values', 'cardinality', 'min_max', '_current_vis', '_widget', '_recommendation'], "Metadata is lost when going from Dataframe to Series."
-#     assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series."
-#     assert series.name == "Weight", "Pandas Series original `name` property not retained."
-
 
 def test_head_tail(global_var):
     df = pytest.car_df
@@ -44,3 +33,17 @@ def test_head_tail(global_var):
         "Lux is visualizing the previous version of the dataframe before you applied <code>tail</code>."
         in df._message.to_html()
     )
+
+
+def test_describe(global_var):
+    df = pytest.college_df
+    summary = df.describe()
+    summary._repr_html_()
+    assert len(summary.recommendation["Column Groups"]) == len(summary.columns) == 10
+
+
+def test_convert_dtype(global_var):
+    df = pytest.college_df
+    cdf = df.convert_dtypes()
+    cdf._repr_html_()
+    assert list(cdf.recommendation.keys()) == ["Correlation", "Distribution", "Occurrence"]
diff --git a/tests/test_series.py b/tests/test_series.py
@@ -0,0 +1,53 @@
+#  Copyright 2019-2020 The Lux Authors.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from .context import lux
+import pytest
+import pandas as pd
+import warnings
+
+
+def test_df_to_series():
+    # Ensure metadata is kept when going from df to series
+    df = pd.read_csv("lux/data/car.csv")
+    df._repr_html_()  # compute metadata
+    assert df.cardinality is not None
+    series = df["Weight"]
+    assert isinstance(series, lux.core.series.LuxSeries), "Derived series is type LuxSeries."
+    print(df["Weight"]._metadata)
+    assert df["Weight"]._metadata == [
+        "_intent",
+        "data_type",
+        "unique_values",
+        "cardinality",
+        "_rec_info",
+        "_pandas_only",
+        "_min_max",
+        "plot_config",
+        "_current_vis",
+        "_widget",
+        "_recommendation",
+        "_prev",
+        "_history",
+        "_saved_export",
+        "name",
+    ], "Metadata is lost when going from Dataframe to Series."
+    assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series."
+    assert series.name == "Weight", "Pandas Series original `name` property not retained."
+
+
+def test_print_dtypes(global_var):
+    df = pytest.college_df
+    with warnings.catch_warnings(record=True) as w:
+        print(df.dtypes)
+        assert len(w) == 0, "Warning displayed when printing dtypes"