From c1944a23892b91529987118fd3d36a9725347c60 Mon Sep 17 00:00:00 2001
From: Doris Lee <dorisjunglinlee@gmail.com>
Date: Wed, 6 Jan 2021 12:02:37 +0800
Subject: [PATCH 1/4] bugfix for describe and convert_dtypes

---
 lux/core/frame.py              |  2 +-
 lux/core/series.py             | 16 ++++++++--------
 lux/executor/PandasExecutor.py | 18 +++++-------------
 tests/test_nan.py              |  2 ++
 tests/test_pandas.py           | 14 ++++++++++++++
 5 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/lux/core/frame.py b/lux/core/frame.py
index e4ed9e3e..8546c168 100644
--- a/lux/core/frame.py
+++ b/lux/core/frame.py
@@ -155,7 +155,7 @@ def _set_item(self, key, value):
     def _infer_structure(self):
         # If the dataframe is very small and the index column is not a range index, then it is likely that this is an aggregated data
         is_multi_index_flag = self.index.nlevels != 1
-        not_int_index_flag = self.index.dtype != "int64"
+        not_int_index_flag = not pd.api.types.is_integer_dtype(self.index)
         small_df_flag = len(self) < 100
         self.pre_aggregated = (is_multi_index_flag or not_int_index_flag) and small_df_flag
         if "Number of Records" in self.columns:
diff --git a/lux/core/series.py b/lux/core/series.py
index aea13d0c..aebcabbd 100644
--- a/lux/core/series.py
+++ b/lux/core/series.py
@@ -45,14 +45,14 @@ def _constructor(self):
     def _constructor_expanddim(self):
         from lux.core.frame import LuxDataFrame
 
-        def f(*args, **kwargs):
-            df = LuxDataFrame(*args, **kwargs)
-            for attr in self._metadata:
-                df.__dict__[attr] = getattr(self, attr, None)
-            return df
-
-        f._get_axis_number = super(LuxSeries, self)._get_axis_number
-        return f
+        # def f(*args, **kwargs):
+        #     df = LuxDataFrame(*args, **kwargs)
+        #     for attr in self._metadata:
+        #         df.__dict__[attr] = getattr(self, attr, None)
+        #     return df
+
+        # f._get_axis_number = super(LuxSeries, self)._get_axis_number
+        return LuxDataFrame
 
     def to_pandas(self):
         import lux.core
diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py
index 9708d8eb..56422866 100644
--- a/lux/executor/PandasExecutor.py
+++ b/lux/executor/PandasExecutor.py
@@ -428,9 +428,7 @@ def compute_data_type(self, ldf: LuxDataFrame):
                 ldf.data_type[attr] = "temporal"
             else:
                 ldf.data_type[attr] = "nominal"
-        # for attr in list(df.dtypes[df.dtypes=="int64"].keys()):
-        #   if self.cardinality[attr]>50:
-        if ldf.index.dtype != "int64" and ldf.index.name:
+        if not pd.api.types.is_integer_dtype(ldf.index) and ldf.index.name:
             ldf.data_type[ldf.index.name] = "nominal"
 
         non_datetime_attrs = []
@@ -489,21 +487,15 @@ def compute_stats(self, ldf: LuxDataFrame):
             ldf.unique_values[attribute_repr] = list(ldf[attribute_repr].unique())
             ldf.cardinality[attribute_repr] = len(ldf.unique_values[attribute_repr])
 
-            # commenting this optimization out to make sure I can filter by cardinality when showing recommended vis
-
-            # if ldf.dtypes[attribute] != "float64":# and not pd.api.types.is_datetime64_ns_dtype(self.dtypes[attribute]):
-            #     ldf.unique_values[attribute_repr] = list(ldf[attribute].unique())
-            #     ldf.cardinality[attribute_repr] = len(ldf.unique_values[attribute])
-            # else:
-            #     ldf.cardinality[attribute_repr] = 999 # special value for non-numeric attribute
-
-            if ldf.dtypes[attribute] == "float64" or ldf.dtypes[attribute] == "int64":
+            if pd.api.types.is_float_dtype(ldf.dtypes[attribute]) or pd.api.types.is_integer_dtype(
+                ldf.dtypes[attribute]
+            ):
                 ldf._min_max[attribute_repr] = (
                     ldf[attribute].min(),
                     ldf[attribute].max(),
                 )
 
-        if ldf.index.dtype != "int64":
+        if not pd.api.types.is_integer_dtype(ldf.index):
             index_column_name = ldf.index.name
             ldf.unique_values[index_column_name] = list(ldf.index)
             ldf.cardinality[index_column_name] = len(ldf.index)
diff --git a/tests/test_nan.py b/tests/test_nan.py
index b2d28fed..1701215f 100644
--- a/tests/test_nan.py
+++ b/tests/test_nan.py
@@ -22,11 +22,13 @@
 
 def test_nan_column(global_var):
     df = pytest.college_df
+    old_geo = df["Geography"]
     df["Geography"] = np.nan
     df._repr_html_()
     for visList in df.recommendation.keys():
         for vis in df.recommendation[visList]:
             assert vis.get_attr_by_attr_name("Geography") == []
+    df["Geography"] = old_geo
 
 
 def test_nan_data_type_detection():
diff --git a/tests/test_pandas.py b/tests/test_pandas.py
index b43cc1f9..34f68605 100644
--- a/tests/test_pandas.py
+++ b/tests/test_pandas.py
@@ -44,3 +44,17 @@ def test_head_tail(global_var):
         "Lux is visualizing the previous version of the dataframe before you applied <code>tail</code>."
         in df._message.to_html()
     )
+
+
+def test_describe(global_var):
+    df = pytest.college_df
+    summary = df.describe()
+    summary._repr_html_()
+    assert len(summary.recommendation["Column Groups"]) == len(summary.columns) == 10
+
+
+def test_convert_dtype(global_var):
+    df = pytest.college_df
+    cdf = df.convert_dtypes()
+    cdf._repr_html_()
+    assert list(cdf.recommendation.keys()) == ["Correlation", "Distribution", "Occurrence"]

From 5c8b2849d449b1b1e0a7c0b5d57be26c926ae160 Mon Sep 17 00:00:00 2001
From: Doris Lee <dorisjunglinlee@gmail.com>
Date: Wed, 6 Jan 2021 12:08:02 +0800
Subject: [PATCH 2/4] added back metadata series test

---
 tests/test_pandas.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tests/test_pandas.py b/tests/test_pandas.py
index 34f68605..e4935fde 100644
--- a/tests/test_pandas.py
+++ b/tests/test_pandas.py
@@ -16,16 +16,17 @@
 import pytest
 import pandas as pd
 
-# def test_df_to_series():
-#     # Ensure metadata is kept when going from df to series
-#     df = pd.read_csv("lux/data/car.csv")
-#     df._repr_html_() # compute metadata
-#     assert df.cardinality is not None
-#     series = df["Weight"]
-#     assert isinstance(series,lux.core.series.LuxSeries), "Derived series is type LuxSeries."
-#     assert df["Weight"]._metadata == ['name','_intent', 'data_type_lookup', 'data_type', 'data_model_lookup', 'data_model', 'unique_values', 'cardinality', 'min_max', '_current_vis', '_widget', '_recommendation'], "Metadata is lost when going from Dataframe to Series."
-#     assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series."
-#     assert series.name == "Weight", "Pandas Series original `name` property not retained."
+def test_df_to_series():
+    # Ensure metadata is kept when going from df to series
+    df = pd.read_csv("lux/data/car.csv")
+    df._repr_html_() # compute metadata
+    assert df.cardinality is not None
+    series = df["Weight"]
+    assert isinstance(series,lux.core.series.LuxSeries), "Derived series is type LuxSeries."
+    print (df["Weight"]._metadata)
+    assert df["Weight"]._metadata == ['_intent', 'data_type', 'unique_values', 'cardinality', '_rec_info', '_pandas_only', '_min_max', 'plot_config', '_current_vis', '_widget', '_recommendation', '_prev', '_history', '_saved_export', 'name'], "Metadata is lost when going from Dataframe to Series."
+    assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series."
+    assert series.name == "Weight", "Pandas Series original `name` property not retained."
 
 
 def test_head_tail(global_var):

From 49daeecbdc4b09b7e0013b6a9b940d7bb303e716 Mon Sep 17 00:00:00 2001
From: Doris Lee <dorisjunglinlee@gmail.com>
Date: Wed, 6 Jan 2021 12:17:39 +0800
Subject: [PATCH 3/4] black

---
 tests/test_pandas.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/tests/test_pandas.py b/tests/test_pandas.py
index e4935fde..4b38ae1a 100644
--- a/tests/test_pandas.py
+++ b/tests/test_pandas.py
@@ -16,15 +16,32 @@
 import pytest
 import pandas as pd
 
+
 def test_df_to_series():
     # Ensure metadata is kept when going from df to series
     df = pd.read_csv("lux/data/car.csv")
-    df._repr_html_() # compute metadata
+    df._repr_html_()  # compute metadata
     assert df.cardinality is not None
     series = df["Weight"]
-    assert isinstance(series,lux.core.series.LuxSeries), "Derived series is type LuxSeries."
-    print (df["Weight"]._metadata)
-    assert df["Weight"]._metadata == ['_intent', 'data_type', 'unique_values', 'cardinality', '_rec_info', '_pandas_only', '_min_max', 'plot_config', '_current_vis', '_widget', '_recommendation', '_prev', '_history', '_saved_export', 'name'], "Metadata is lost when going from Dataframe to Series."
+    assert isinstance(series, lux.core.series.LuxSeries), "Derived series is type LuxSeries."
+    print(df["Weight"]._metadata)
+    assert df["Weight"]._metadata == [
+        "_intent",
+        "data_type",
+        "unique_values",
+        "cardinality",
+        "_rec_info",
+        "_pandas_only",
+        "_min_max",
+        "plot_config",
+        "_current_vis",
+        "_widget",
+        "_recommendation",
+        "_prev",
+        "_history",
+        "_saved_export",
+        "name",
+    ], "Metadata is lost when going from Dataframe to Series."
     assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series."
     assert series.name == "Weight", "Pandas Series original `name` property not retained."
 

From 801b469fe5e375916f64a9abf858f0f35ff24fde Mon Sep 17 00:00:00 2001
From: Doris Lee <dorisjunglinlee@gmail.com>
Date: Wed, 6 Jan 2021 15:58:42 +0800
Subject: [PATCH 4/4] default to pandas display when df.dtypes printed

---
 lux/core/series.py   |  4 +++-
 tests/test_pandas.py | 29 ------------------------
 tests/test_series.py | 53 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 30 deletions(-)
 create mode 100644 tests/test_series.py

diff --git a/lux/core/series.py b/lux/core/series.py
index aebcabbd..1e3c4f8c 100644
--- a/lux/core/series.py
+++ b/lux/core/series.py
@@ -16,6 +16,7 @@
 import lux
 import warnings
 import traceback
+import numpy as np
 
 
 class LuxSeries(pd.Series):
@@ -75,7 +76,8 @@ def __repr__(self):
         ldf = LuxDataFrame(self)
 
         try:
-            if ldf._pandas_only:
+            is_dtype_series = all(isinstance(val, np.dtype) for val in self.values)
+            if ldf._pandas_only or is_dtype_series:
                 print(series_repr)
                 ldf._pandas_only = False
             else:
diff --git a/tests/test_pandas.py b/tests/test_pandas.py
index 4b38ae1a..26cd7333 100644
--- a/tests/test_pandas.py
+++ b/tests/test_pandas.py
@@ -17,35 +17,6 @@
 import pandas as pd
 
 
-def test_df_to_series():
-    # Ensure metadata is kept when going from df to series
-    df = pd.read_csv("lux/data/car.csv")
-    df._repr_html_()  # compute metadata
-    assert df.cardinality is not None
-    series = df["Weight"]
-    assert isinstance(series, lux.core.series.LuxSeries), "Derived series is type LuxSeries."
-    print(df["Weight"]._metadata)
-    assert df["Weight"]._metadata == [
-        "_intent",
-        "data_type",
-        "unique_values",
-        "cardinality",
-        "_rec_info",
-        "_pandas_only",
-        "_min_max",
-        "plot_config",
-        "_current_vis",
-        "_widget",
-        "_recommendation",
-        "_prev",
-        "_history",
-        "_saved_export",
-        "name",
-    ], "Metadata is lost when going from Dataframe to Series."
-    assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series."
-    assert series.name == "Weight", "Pandas Series original `name` property not retained."
-
-
 def test_head_tail(global_var):
     df = pytest.car_df
     df._repr_html_()
diff --git a/tests/test_series.py b/tests/test_series.py
new file mode 100644
index 00000000..62a4697f
--- /dev/null
+++ b/tests/test_series.py
@@ -0,0 +1,53 @@
+#  Copyright 2019-2020 The Lux Authors.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from .context import lux
+import pytest
+import pandas as pd
+import warnings
+
+
+def test_df_to_series():
+    # Ensure metadata is kept when going from df to series
+    df = pd.read_csv("lux/data/car.csv")
+    df._repr_html_()  # compute metadata
+    assert df.cardinality is not None
+    series = df["Weight"]
+    assert isinstance(series, lux.core.series.LuxSeries), "Derived series is type LuxSeries."
+    print(df["Weight"]._metadata)
+    assert df["Weight"]._metadata == [
+        "_intent",
+        "data_type",
+        "unique_values",
+        "cardinality",
+        "_rec_info",
+        "_pandas_only",
+        "_min_max",
+        "plot_config",
+        "_current_vis",
+        "_widget",
+        "_recommendation",
+        "_prev",
+        "_history",
+        "_saved_export",
+        "name",
+    ], "Metadata is lost when going from Dataframe to Series."
+    assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series."
+    assert series.name == "Weight", "Pandas Series original `name` property not retained."
+
+
+def test_print_dtypes(global_var):
+    df = pytest.college_df
+    with warnings.catch_warnings(record=True) as w:
+        print(df.dtypes)
+        assert len(w) == 0, "Warning displayed when printing dtypes"