Refactor variable processing (#2071)

* Add initial common function for processing long-form inputs * Attempt to use new core variable processing for longform relational plots * Create and ignore a directory for notes * Move relational plots to use common variable processing * Refactor establish_variables method into core * Allow relational plots to use wide lists of lists * Change base class to _VectorPlotter * Add initial attempt at generalized wide data processing * Modify tests for new intermeidate wide-form data representation * Remove relational-specific wide data processing * Fold relplot tests under TestRelationalPlotter * Move tests for core variable processing * Revert test reorganization; PEP8 and clean up names * Add tests for wide dict inputs * Pandas compat * Modernize numpy random usage in test fixtures * Fix docstring and comments * Use containment checks rather than KeyError handling * Improve test coverage for long data and messy wide data * Test variables from dataframe index * Flesh out wide data docstring * Return variables dict along with plot_data df * First attempt at generalizing relplot inputs * Refactor and parametrize flat variables tests * Test at base class level * Test relplot from wide data and long vectors * Fix test
mwaskom · May 15, 2020 · 5fbb5c4 · 5fbb5c4
1 parent 3db4d5a
commit 5fbb5c4
Show file tree

Hide file tree

Showing 6 changed files with 1,497 additions and 830 deletions.
diff --git a/.gitignore b/.gitignore
@@ -10,3 +10,4 @@ cover/
 .idea/
 .vscode/
 .pytest_cache/
+notes/
diff --git a/seaborn/conftest.py b/seaborn/conftest.py
@@ -1,5 +1,7 @@
 import numpy as np
+import pandas as pd
 import matplotlib.pyplot as plt
+
 import pytest
 
 
@@ -11,4 +13,168 @@ def close_figs():
 
 @pytest.fixture(autouse=True)
 def random_seed():
-    np.random.seed(47)
+    seed = sum(map(ord, "seaborn random global"))
+    np.random.seed(seed)
+
+
+@pytest.fixture()
+def rng():
+    seed = sum(map(ord, "seaborn random object"))
+    return np.random.RandomState(seed)
+
+
+@pytest.fixture
+def wide_df(rng):
+
+    columns = list("abc")
+    index = pd.Int64Index(np.arange(10, 50, 2), name="wide_index")
+    values = rng.normal(size=(len(index), len(columns)))
+    return pd.DataFrame(values, index=index, columns=columns)
+
+
+@pytest.fixture
+def wide_array(wide_df):
+
+    # Requires panads >= 0.24
+    # return wide_df.to_numpy()
+    return np.asarray(wide_df)
+
+
+@pytest.fixture
+def flat_series(rng):
+
+    index = pd.Int64Index(np.arange(10, 30), name="t")
+    return pd.Series(rng.normal(size=20), index, name="s")
+
+
+@pytest.fixture
+def flat_array(flat_series):
+
+    # Requires panads >= 0.24
+    # return flat_series.to_numpy()
+    return np.asarray(flat_series)
+
+
+@pytest.fixture
+def flat_list(flat_series):
+
+    # Requires panads >= 0.24
+    # return flat_series.to_list()
+    return flat_series.tolist()
+
+
+@pytest.fixture(params=["series", "array", "list"])
+def flat_data(rng, request):
+
+    index = pd.Int64Index(np.arange(10, 30), name="t")
+    series = pd.Series(rng.normal(size=20), index, name="s")
+    if request.param == "series":
+        data = series
+    elif request.param == "array":
+        try:
+            data = series.to_numpy()  # Requires pandas >= 0.24
+        except AttributeError:
+            data = np.asarray(series)
+    elif request.param == "list":
+        try:
+            data = series.to_list()  # Requires pandas >= 0.24
+        except AttributeError:
+            data = series.tolist()
+    return data
+
+
+@pytest.fixture
+def wide_list_of_series(rng):
+
+    return [pd.Series(rng.normal(size=20), np.arange(20), name="a"),
+            pd.Series(rng.normal(size=10), np.arange(5, 15), name="b")]
+
+
+@pytest.fixture
+def wide_list_of_arrays(wide_list_of_series):
+
+    # Requires pandas >= 0.24
+    # return [s.to_numpy() for s in wide_list_of_series]
+    return [np.asarray(s) for s in wide_list_of_series]
+
+
+@pytest.fixture
+def wide_list_of_lists(wide_list_of_series):
+
+    # Requires pandas >= 0.24
+    # return [s.to_list() for s in wide_list_of_series]
+    return [s.tolist() for s in wide_list_of_series]
+
+
+@pytest.fixture
+def wide_dict_of_series(wide_list_of_series):
+
+    return {s.name: s for s in wide_list_of_series}
+
+
+@pytest.fixture
+def wide_dict_of_arrays(wide_list_of_series):
+
+    # Requires pandas >= 0.24
+    # return {s.name: s.to_numpy() for s in wide_list_of_series}
+    return {s.name: np.asarray(s) for s in wide_list_of_series}
+
+
+@pytest.fixture
+def wide_dict_of_lists(wide_list_of_series):
+
+    # Requires pandas >= 0.24
+    # return {s.name: s.to_list() for s in wide_list_of_series}
+    return {s.name: s.tolist() for s in wide_list_of_series}
+
+
+@pytest.fixture
+def long_df(rng):
+
+    n = 100
+    df = pd.DataFrame(dict(
+        x=rng.uniform(0, 20, n).round().astype("int"),
+        y=rng.normal(size=n),
+        a=rng.choice(list("abc"), n),
+        b=rng.choice(list("mnop"), n),
+        c=rng.choice([0, 1], n),
+        t=np.repeat(np.datetime64('2005-02-25'), n),
+        s=rng.choice([2, 4, 8], n),
+        f=rng.choice([0.2, 0.3], n),
+    ))
+    df["s_cat"] = df["s"].astype("category")
+    return df
+
+
+@pytest.fixture
+def long_dict(long_df):
+
+    return long_df.to_dict()
+
+
+@pytest.fixture
+def repeated_df(rng):
+
+    n = 100
+    return pd.DataFrame(dict(
+        x=np.tile(np.arange(n // 2), 2),
+        y=rng.normal(size=n),
+        a=rng.choice(list("abc"), n),
+        u=np.repeat(np.arange(2), n // 2),
+    ))
+
+
+@pytest.fixture
+def missing_df(rng, long_df):
+
+    df = long_df.copy()
+    for col in df:
+        idx = rng.permutation(df.index)[:10]
+        df.loc[idx, col] = np.nan
+    return df
+
+
+@pytest.fixture
+def null_series():
+
+    return pd.Series(index=np.arange(20), dtype='float64')