Skip to content

Commit

Permalink
Refactor variable processing (#2071)
Browse files Browse the repository at this point in the history
* Add initial common function for processing long-form inputs

* Attempt to use new core variable processing for longform relational plots

* Create and ignore a directory for notes

* Move relational plots to use common variable processing

* Refactor establish_variables method into core

* Allow relational plots to use wide lists of lists

* Change base class to _VectorPlotter

* Add initial attempt at generalized wide data processing

* Modify tests for new intermeidate wide-form data representation

* Remove relational-specific wide data processing

* Fold relplot tests under TestRelationalPlotter

* Move tests for core variable processing

* Revert test reorganization; PEP8 and clean up names

* Add tests for wide dict inputs

* Pandas compat

* Modernize numpy random usage in test fixtures

* Fix docstring and comments

* Use containment checks rather than KeyError handling

* Improve test coverage for long data and messy wide data

* Test variables from dataframe index

* Flesh out wide data docstring

* Return variables dict along with plot_data df

* First attempt at generalizing relplot inputs

* Refactor and parametrize flat variables tests

* Test at base class level

* Test relplot from wide data and long vectors

* Fix test
  • Loading branch information
mwaskom committed May 15, 2020
1 parent 3db4d5a commit 5fbb5c4
Show file tree
Hide file tree
Showing 6 changed files with 1,497 additions and 830 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -10,3 +10,4 @@ cover/
.idea/
.vscode/
.pytest_cache/
notes/
168 changes: 167 additions & 1 deletion seaborn/conftest.py
@@ -1,5 +1,7 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import pytest


Expand All @@ -11,4 +13,168 @@ def close_figs():

@pytest.fixture(autouse=True)
def random_seed():
np.random.seed(47)
seed = sum(map(ord, "seaborn random global"))
np.random.seed(seed)


@pytest.fixture()
def rng():
seed = sum(map(ord, "seaborn random object"))
return np.random.RandomState(seed)


@pytest.fixture
def wide_df(rng):

columns = list("abc")
index = pd.Int64Index(np.arange(10, 50, 2), name="wide_index")
values = rng.normal(size=(len(index), len(columns)))
return pd.DataFrame(values, index=index, columns=columns)


@pytest.fixture
def wide_array(wide_df):

# Requires panads >= 0.24
# return wide_df.to_numpy()
return np.asarray(wide_df)


@pytest.fixture
def flat_series(rng):

index = pd.Int64Index(np.arange(10, 30), name="t")
return pd.Series(rng.normal(size=20), index, name="s")


@pytest.fixture
def flat_array(flat_series):

# Requires panads >= 0.24
# return flat_series.to_numpy()
return np.asarray(flat_series)


@pytest.fixture
def flat_list(flat_series):

# Requires panads >= 0.24
# return flat_series.to_list()
return flat_series.tolist()


@pytest.fixture(params=["series", "array", "list"])
def flat_data(rng, request):

index = pd.Int64Index(np.arange(10, 30), name="t")
series = pd.Series(rng.normal(size=20), index, name="s")
if request.param == "series":
data = series
elif request.param == "array":
try:
data = series.to_numpy() # Requires pandas >= 0.24
except AttributeError:
data = np.asarray(series)
elif request.param == "list":
try:
data = series.to_list() # Requires pandas >= 0.24
except AttributeError:
data = series.tolist()
return data


@pytest.fixture
def wide_list_of_series(rng):

return [pd.Series(rng.normal(size=20), np.arange(20), name="a"),
pd.Series(rng.normal(size=10), np.arange(5, 15), name="b")]


@pytest.fixture
def wide_list_of_arrays(wide_list_of_series):

# Requires pandas >= 0.24
# return [s.to_numpy() for s in wide_list_of_series]
return [np.asarray(s) for s in wide_list_of_series]


@pytest.fixture
def wide_list_of_lists(wide_list_of_series):

# Requires pandas >= 0.24
# return [s.to_list() for s in wide_list_of_series]
return [s.tolist() for s in wide_list_of_series]


@pytest.fixture
def wide_dict_of_series(wide_list_of_series):

return {s.name: s for s in wide_list_of_series}


@pytest.fixture
def wide_dict_of_arrays(wide_list_of_series):

# Requires pandas >= 0.24
# return {s.name: s.to_numpy() for s in wide_list_of_series}
return {s.name: np.asarray(s) for s in wide_list_of_series}


@pytest.fixture
def wide_dict_of_lists(wide_list_of_series):

# Requires pandas >= 0.24
# return {s.name: s.to_list() for s in wide_list_of_series}
return {s.name: s.tolist() for s in wide_list_of_series}


@pytest.fixture
def long_df(rng):

n = 100
df = pd.DataFrame(dict(
x=rng.uniform(0, 20, n).round().astype("int"),
y=rng.normal(size=n),
a=rng.choice(list("abc"), n),
b=rng.choice(list("mnop"), n),
c=rng.choice([0, 1], n),
t=np.repeat(np.datetime64('2005-02-25'), n),
s=rng.choice([2, 4, 8], n),
f=rng.choice([0.2, 0.3], n),
))
df["s_cat"] = df["s"].astype("category")
return df


@pytest.fixture
def long_dict(long_df):

return long_df.to_dict()


@pytest.fixture
def repeated_df(rng):

n = 100
return pd.DataFrame(dict(
x=np.tile(np.arange(n // 2), 2),
y=rng.normal(size=n),
a=rng.choice(list("abc"), n),
u=np.repeat(np.arange(2), n // 2),
))


@pytest.fixture
def missing_df(rng, long_df):

df = long_df.copy()
for col in df:
idx = rng.permutation(df.index)[:10]
df.loc[idx, col] = np.nan
return df


@pytest.fixture
def null_series():

return pd.Series(index=np.arange(20), dtype='float64')

0 comments on commit 5fbb5c4

Please sign in to comment.