Skip to content

Commit

Permalink
Merge pull request #118 from nicodv/cleanup
Browse files Browse the repository at this point in the history
Cleanup
  • Loading branch information
nicodv committed Apr 15, 2019
2 parents 240c4ab + 1d9fc1f commit 1e16b4f
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 20 deletions.
4 changes: 1 addition & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@ matrix:
- python: 3.7
dist: xenial
sudo: true
cache:
apt
pip
cache: pip
addons:
apt:
packages:
Expand Down
13 changes: 4 additions & 9 deletions kmodes/kmodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
from sklearn.utils import check_random_state
from sklearn.utils.validation import check_array

from .util import get_max_value_key, encode_features, get_unique_rows, decode_centroids
from .util import get_max_value_key, encode_features, get_unique_rows, \
decode_centroids, pandas_to_numpy
from .util.dissim import matching_dissim, ng_dissim


Expand Down Expand Up @@ -245,10 +246,6 @@ def k_modes(X, n_clusters, max_iter, dissim, init, n_init, verbose, random_state
if sparse.issparse(X):
raise TypeError("k-modes does not support sparse data.")

# Convert pandas objects to numpy arrays.
if 'pandas' in str(X.__class__):
X = X.values

X = check_array(X, dtype=None)

# Convert the categorical values in X to integers for speed.
Expand Down Expand Up @@ -388,6 +385,7 @@ def fit(self, X, y=None, **kwargs):
----------
X : array-like, shape=[n_samples, n_features]
"""
X = pandas_to_numpy(X)

random_state = check_random_state(self.random_state)
self._enc_cluster_centroids, self._enc_map, self.labels_,\
Expand Down Expand Up @@ -424,17 +422,14 @@ def predict(self, X, **kwargs):
Index of the cluster each sample belongs to.
"""

# Convert pandas objects to numpy arrays.
if 'pandas' in str(X.__class__):
X = X.values

assert hasattr(self, '_enc_cluster_centroids'), "Model not yet fitted."

if self.verbose and self.cat_dissim == ng_dissim:
print("Ng's dissimilarity measure was used to train this model, "
"but now that it is predicting the model will fall back to "
"using simple matching dissimilarity.")

X = pandas_to_numpy(X)
X = check_array(X, dtype=None)
X, _ = encode_features(X, enc_map=self._enc_map)
return _labels_cost(X, self._enc_cluster_centroids, self.cat_dissim)[0]
Expand Down
14 changes: 6 additions & 8 deletions kmodes/kprototypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
from sklearn.utils.validation import check_array

from . import kmodes
from .util import get_max_value_key, encode_features, get_unique_rows, decode_centroids
from .util import get_max_value_key, encode_features, get_unique_rows, \
decode_centroids, pandas_to_numpy
from .util.dissim import matching_dissim, euclidean_dissim

# Number of tries we give the initialization methods to find non-empty
Expand Down Expand Up @@ -258,10 +259,6 @@ def k_prototypes(X, categorical, n_clusters, max_iter, num_dissim, cat_dissim,
if sparse.issparse(X):
raise TypeError("k-prototypes does not support sparse data.")

# Convert pandas objects to numpy arrays.
if 'pandas' in str(X.__class__):
X = X.values

if categorical is None or not categorical:
raise NotImplementedError(
"No categorical data selected, effectively doing k-means. "
Expand Down Expand Up @@ -445,6 +442,8 @@ def fit(self, X, y=None, categorical=None):
column in your data, or a list or tuple of several of them, \
but it is a {}.".format(type(categorical))

X = pandas_to_numpy(X)

random_state = check_random_state(self.random_state)
# If self.gamma is None, gamma will be automatically determined from
# the data. The function below returns its value.
Expand Down Expand Up @@ -478,15 +477,14 @@ def predict(self, X, categorical=None):
Index of the cluster each sample belongs to.
"""
assert hasattr(self, '_enc_cluster_centroids'), "Model not yet fitted."
# Convert pandas objects to numpy arrays.
if 'pandas' in str(X.__class__):
X = X.values

if categorical is not None:
assert isinstance(categorical, (int, list, tuple)), "The 'categorical' \
argument needs to be an integer with the index of the categorical \
column in your data, or a list or tuple of several of them, \
but it is a {}.".format(type(categorical))

X = pandas_to_numpy(X)
Xnum, Xcat = _split_num_cat(X, categorical)
Xnum, Xcat = check_array(Xnum), check_array(Xcat, dtype=None)
Xcat, _ = encode_features(Xcat, enc_map=self._enc_map)
Expand Down
4 changes: 4 additions & 0 deletions kmodes/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
import numpy as np


def pandas_to_numpy(x):
return x.values if 'pandas' in str(x.__class__) else x


def get_max_value_key(dic):
"""Gets the key for the maximum value in a dict."""
v = np.array(list(dic.values()))
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,6 @@
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Topic :: Scientific/Engineering'],
)

0 comments on commit 1e16b4f

Please sign in to comment.