Skip to content

Commit

Permalink
ENH: Support seaborn, pt1
Browse files Browse the repository at this point in the history
  • Loading branch information
sinhrks committed Jan 9, 2016
1 parent 994197d commit bbbb9b2
Show file tree
Hide file tree
Showing 17 changed files with 695 additions and 4 deletions.
5 changes: 5 additions & 0 deletions .travis.yml
Expand Up @@ -15,6 +15,11 @@ addons:
install:
- bash scripts/build_travis.sh

before_script:
- "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start"
- sleep 3 # give xvfb some time to start

script:
- export PATH="$HOME/miniconda/bin:$PATH"
- source activate myenv
Expand Down
27 changes: 27 additions & 0 deletions LICENSES/SEABORN_LICENSE
@@ -0,0 +1,27 @@
Copyright (c) 2012-2013, Michael L. Waskom
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

* Neither the name of the {organization} nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1 change: 1 addition & 0 deletions pandas_ml/__init__.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python

from pandas_ml.core import ModelFrame, ModelSeries
from pandas_ml.tools import info
from pandas_ml.version import version as __version__
1 change: 1 addition & 0 deletions pandas_ml/core/accessor.py
Expand Up @@ -41,6 +41,7 @@ def __init__(self, df, module_name=None, attrs=None):
setattr(self, mobj, getattr(self._module, mobj))
except AttributeError:
pass

except NotImplementedError:
pass

Expand Down
15 changes: 15 additions & 0 deletions pandas_ml/core/frame.py
Expand Up @@ -13,6 +13,7 @@
from pandas_ml.core.accessor import _AccessorMethods
import pandas_ml.skaccessors as skaccessors
import pandas_ml.smaccessors as smaccessors
import pandas_ml.seaborn as seaborn
import pandas_ml.xgboost as xgboost
import pandas_ml.misc as misc
import pandas_ml.util as util
Expand Down Expand Up @@ -798,6 +799,20 @@ def tree(self):
def _tree(self):
return _AccessorMethods(self, module_name='sklearn.tree')

@property
def sns(self):
"""Property to access ``seaborn`` API"""
return self._seaborn

@property
def seaborn(self):
"""Property to access ``seaborn`` API"""
return self._seaborn

@cache_readonly
def _seaborn(self):
return seaborn.SeabornMethods(self)

@property
def xgb(self):
"""Property to access ``xgboost.sklearn`` API"""
Expand Down
3 changes: 2 additions & 1 deletion pandas_ml/plotting/estimator.py
Expand Up @@ -11,7 +11,8 @@ def __init__(self, data, estimator):
self.data = data
self.estimator = estimator

self.n_components = len(self.data.data)
assert isinstance(self.data, pd.DataFrame)
self.n_components = self.data.data.shape[1]

# used to reduce dimensionality
from sklearn.decomposition import PCA
Expand Down
1 change: 1 addition & 0 deletions pandas_ml/plotting/test/__init__.py
@@ -0,0 +1 @@
#!/usr/bin/env python
75 changes: 75 additions & 0 deletions pandas_ml/plotting/test/test_estimator.py
@@ -0,0 +1,75 @@
#!/usr/bin/env python

import numpy as np
import pandas as pd

import sklearn.datasets as datasets

import pandas_ml as pdml
import pandas_ml.util.testing as tm

import matplotlib
matplotlib.use('Agg')


class TestPlotting(tm.PlottingTestCase):

def test_no_estimator(self):
df = pdml.ModelFrame(datasets.load_iris())
with tm.assertRaises(ValueError):
df.plot_estimator()

def test_not_supported_estimator(self):
df = pdml.ModelFrame(datasets.load_iris())
df.fit(df.cluster.KMeans(n_clusters=3))

with tm.assertRaises(NotImplementedError):
df.plot_estimator()

def test_regression_plot_2d(self):
df = pdml.ModelFrame(datasets.load_diabetes())
df.data = df.data[[0]]
df.fit(df.linear_model.LinearRegression())
ax = df.plot_estimator()
self.assertIsInstance(ax, matplotlib.axes.Axes)

def test_regression_plot_3d(self):
df = pdml.ModelFrame(datasets.load_diabetes())
df.data = df.data[[0, 2]]
df.fit(df.linear_model.LinearRegression())
ax = df.plot_estimator()

from mpl_toolkits.mplot3d import Axes3D
self.assertIsInstance(ax, Axes3D)

def test_classification_plot_proba(self):
df = pdml.ModelFrame(datasets.load_iris())
df.data = df.data.iloc[:, [0, 1]]
df.fit(df.svm.SVC(C=1.0, probability=True))
axes = df.plot_estimator()
self._check_axes_shape(axes, axes_num=3, layout=(2, 2))

def test_classification_plot_decision(self):
df = pdml.ModelFrame(datasets.load_iris())
df.data = df.data.iloc[:, [0, 1]]
df.fit(df.svm.SVC(C=1.0))
axes = df.plot_estimator()
self._check_axes_shape(axes, axes_num=3, layout=(2, 2))

def test_classification_plot_proba_highdim(self):
df = pdml.ModelFrame(datasets.load_iris())
df.fit(df.svm.SVC(C=1.0, probability=True))
axes = df.plot_estimator()
self._check_axes_shape(axes, axes_num=3, layout=(2, 2))

def test_classification_plot_decision_highdim(self):
df = pdml.ModelFrame(datasets.load_iris())
df.fit(df.svm.SVC(C=1.0))
axes = df.plot_estimator()
self._check_axes_shape(axes, axes_num=3, layout=(2, 2))


if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
3 changes: 3 additions & 0 deletions pandas_ml/seaborn/__init__.py
@@ -0,0 +1,3 @@
#!/usr/bin/env python

from pandas_ml.seaborn.base import SeabornMethods
202 changes: 202 additions & 0 deletions pandas_ml/seaborn/base.py
@@ -0,0 +1,202 @@
#!/usr/bin/env python

import pandas as pd

#!/usr/bin/env python

import numpy as np
import pandas as pd
from pandas.util.decorators import cache_readonly

from pandas_ml.core.accessor import _AccessorMethods, _attach_methods


class SeabornMethods(_AccessorMethods):
"""Accessor to ``sklearn.cluster``."""

_module_name = 'seaborn'

def _maybe_target_name(self, value, key):
if value is None:
if self._df.has_multi_targets():
msg = ("{key} can't be ommitted when ModelFrame has multiple "
"target multiple target columns")
raise ValueError(msg.format(key))
value = self._df.target_name
return value

def _maybe_target_series(self, value, key):
if value is None:
if self._df.has_multi_targets():
msg = ("{key} can't be ommitted when ModelFrame has multiple "
"target multiple target columns")
raise ValueError(msg.format(key))
value = self._df.target

elif not pd.core.common.is_list_like(value):
value = self._df[value]
return value

# Axis grids

def FacetGrid(self, row=None, col=None, *args, **kwargs):
return self._module.FacetGrid(data=self._df, row=row, col=col,
*args, **kwargs)

def PairGrid(self, *args, **kwargs):
return self._module.PairGrid(data=self._df, *args, **kwargs)

def JointGrid(self, x, y, *args, **kwargs):
return self._module.JointGrid(x, y, data=self._df, *args, **kwargs)

# Distribution plots

def distplot(self, a=None, *args, **kwargs):
"""
Call ``seaborn.distplot`` using automatic mapping.
- ``a``: ``ModelFrame.target``
"""
a = self._maybe_target_series(a, key='a')
return self._module.distplot(a, *args, **kwargs)

def rugplot(self, a=None, *args, **kwargs):
"""
Call ``seaborn.rugplot`` using automatic mapping.
- ``a``: ``ModelFrame.target``
"""
a = self._maybe_target_series(a, key='a')
return self._module.rugplot(a, *args, **kwargs)

# Regression plots

def interactplot(self, x1, x2, y=None, *args, **kwargs):
"""
Call ``seaborn.interactplot`` using automatic mapping.
- ``data``: ``ModelFrame``
- ``y``: ``ModelFrame.target_name``
"""

y = self._maybe_target_name(y, key='y')
return self._module.interactplot(x1, x2, y, data=self._df,
*args, **kwargs)

def coefplot(self, formula, *args, **kwargs):
"""
Call ``seaborn.coefplot`` using automatic mapping.
- ``data``: ``ModelFrame``
"""
return self._module.coefplot(formula, data=self._df, *args, **kwargs)

# Categorical plots

def countplot(self, x=None, y=None, *args, **kwargs):
"""
Call ``seaborn.countplot`` using automatic mapping.
- ``data``: ``ModelFrame``
- ``y``: ``ModelFrame.target_name``
"""
if x is None and y is None:
x = self._maybe_target_name(x, key='x')
return self._module.countplot(x, y, data=self._df, *args, **kwargs)

# Matrix plots

def heatmap(self, *args, **kwargs):
"""
Call ``seaborn.heatmap`` using automatic mapping.
- ``data``: ``ModelFrame``
"""
return self._module.heatmap(data=self._df, *args, **kwargs)

def clustermap(self, *args, **kwargs):
"""
Call ``seaborn.clustermap`` using automatic mapping.
- ``data``: ``ModelFrame``
"""
return self._module.clustermap(data=self._df, *args, **kwargs)

# Timeseries plots

def tsplot(self, *args, **kwargs):
"""
Call ``seaborn.tsplot`` using automatic mapping.
- ``data``: ``ModelFrame``
"""
return self._module.tsplot(data=self._df, *args, **kwargs)



def _wrap_xy_plot(func, func_name):
"""
Wrapper for plotting with x, y, data
"""
def f(self, x, y=None, *args, **kwargs):
y = self._maybe_target_name(y, key='y')
return func(x, y, data=self._df, *args, **kwargs)

f.__doc__ = (
"""
Call ``%s`` using automatic mapping.
- ``data``: ``ModelFrame``
- ``y``: ``ModelFrame.target_name``
""" % func_name)
return f


def _wrap_categorical_plot(func, func_name):
"""
Wrapper for categorical, x and y may be optional
"""
def f(self, y=None, x=None, *args, **kwargs):

if x is not None and y is None:
y = self._maybe_target_name(y, key='y')

elif x is None and y is not None:
x = self._maybe_target_name(x, key='x')
print(y)
return func(x, y, data=self._df, *args, **kwargs)

f.__doc__ = (
"""
Call ``%s`` using automatic mapping. If you omit x
- ``data``: ``ModelFrame``
- ``x``: ``ModelFrame.target_name``
""" % func_name)
return f

def _wrap_data_plot(func, func_name):
"""
Wrapper for plotting with data
"""
def f(self, *args, **kwargs):
return func(data=self._df, *args, **kwargs)

f.__doc__ = (
"""
Call ``%s`` using automatic mapping.
- ``data``: ``ModelFrame``
""" % func_name)
return f

_xy_plots = ['jointplot', 'lmplot', 'regplot', 'residplot']
_attach_methods(SeabornMethods, _wrap_xy_plot, _xy_plots)

_categorical_plots = ['factorplot', 'boxplot', 'violinplot', 'stripplot',
'pointplot', 'barplot']
_attach_methods(SeabornMethods, _wrap_categorical_plot, _categorical_plots)

_data_plots = ['pairplot', 'kdeplot']
_attach_methods(SeabornMethods, _wrap_data_plot, _data_plots)
1 change: 1 addition & 0 deletions pandas_ml/seaborn/test/__init__.py
@@ -0,0 +1 @@
#!/usr/bin/env python

0 comments on commit bbbb9b2

Please sign in to comment.