Merge pull request #118 from nicodv/cleanup

Cleanup
nicodv · Apr 15, 2019 · 1e16b4f · 1e16b4f
2 parents 240c4ab + 1d9fc1f
commit 1e16b4f
Show file tree

Hide file tree

Showing 5 changed files with 16 additions and 20 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -11,9 +11,7 @@ matrix:
     - python: 3.7
       dist: xenial
       sudo: true
-cache:
-  apt
-  pip
+cache: pip
 addons:
   apt:
     packages:

diff --git a/kmodes/kmodes.py b/kmodes/kmodes.py
@@ -13,7 +13,8 @@
 from sklearn.utils import check_random_state
 from sklearn.utils.validation import check_array
 
-from .util import get_max_value_key, encode_features, get_unique_rows, decode_centroids
+from .util import get_max_value_key, encode_features, get_unique_rows, \
+    decode_centroids, pandas_to_numpy
 from .util.dissim import matching_dissim, ng_dissim
 
 
@@ -245,10 +246,6 @@ def k_modes(X, n_clusters, max_iter, dissim, init, n_init, verbose, random_state
     if sparse.issparse(X):
         raise TypeError("k-modes does not support sparse data.")
 
-    # Convert pandas objects to numpy arrays.
-    if 'pandas' in str(X.__class__):
-        X = X.values
-
     X = check_array(X, dtype=None)
 
     # Convert the categorical values in X to integers for speed.
@@ -388,6 +385,7 @@ def fit(self, X, y=None, **kwargs):
         ----------
         X : array-like, shape=[n_samples, n_features]
         """
+        X = pandas_to_numpy(X)
 
         random_state = check_random_state(self.random_state)
         self._enc_cluster_centroids, self._enc_map, self.labels_,\
@@ -424,17 +422,14 @@ def predict(self, X, **kwargs):
             Index of the cluster each sample belongs to.
         """
 
-        # Convert pandas objects to numpy arrays.
-        if 'pandas' in str(X.__class__):
-            X = X.values
-
         assert hasattr(self, '_enc_cluster_centroids'), "Model not yet fitted."
 
         if self.verbose and self.cat_dissim == ng_dissim:
             print("Ng's dissimilarity measure was used to train this model, "
                   "but now that it is predicting the model will fall back to "
                   "using simple matching dissimilarity.")
 
+        X = pandas_to_numpy(X)
         X = check_array(X, dtype=None)
         X, _ = encode_features(X, enc_map=self._enc_map)
         return _labels_cost(X, self._enc_cluster_centroids, self.cat_dissim)[0]

diff --git a/kmodes/kprototypes.py b/kmodes/kprototypes.py
@@ -13,7 +13,8 @@
 from sklearn.utils.validation import check_array
 
 from . import kmodes
-from .util import get_max_value_key, encode_features, get_unique_rows, decode_centroids
+from .util import get_max_value_key, encode_features, get_unique_rows, \
+    decode_centroids, pandas_to_numpy
 from .util.dissim import matching_dissim, euclidean_dissim
 
 # Number of tries we give the initialization methods to find non-empty
@@ -258,10 +259,6 @@ def k_prototypes(X, categorical, n_clusters, max_iter, num_dissim, cat_dissim,
     if sparse.issparse(X):
         raise TypeError("k-prototypes does not support sparse data.")
 
-    # Convert pandas objects to numpy arrays.
-    if 'pandas' in str(X.__class__):
-        X = X.values
-
     if categorical is None or not categorical:
         raise NotImplementedError(
             "No categorical data selected, effectively doing k-means. "
@@ -445,6 +442,8 @@ def fit(self, X, y=None, categorical=None):
                 column in your data, or a list or tuple of several of them, \
                 but it is a {}.".format(type(categorical))
 
+        X = pandas_to_numpy(X)
+
         random_state = check_random_state(self.random_state)
         # If self.gamma is None, gamma will be automatically determined from
         # the data. The function below returns its value.
@@ -478,15 +477,14 @@ def predict(self, X, categorical=None):
             Index of the cluster each sample belongs to.
         """
         assert hasattr(self, '_enc_cluster_centroids'), "Model not yet fitted."
-        # Convert pandas objects to numpy arrays.
-        if 'pandas' in str(X.__class__):
-            X = X.values
+
         if categorical is not None:
             assert isinstance(categorical, (int, list, tuple)), "The 'categorical' \
                 argument needs to be an integer with the index of the categorical \
                 column in your data, or a list or tuple of several of them, \
                 but it is a {}.".format(type(categorical))
 
+        X = pandas_to_numpy(X)
         Xnum, Xcat = _split_num_cat(X, categorical)
         Xnum, Xcat = check_array(Xnum), check_array(Xcat, dtype=None)
         Xcat, _ = encode_features(Xcat, enc_map=self._enc_map)

diff --git a/kmodes/util/__init__.py b/kmodes/util/__init__.py
@@ -5,6 +5,10 @@
 import numpy as np
 
 
+def pandas_to_numpy(x):
+    return x.values if 'pandas' in str(x.__class__) else x
+
+
 def get_max_value_key(dic):
     """Gets the key for the maximum value in a dict."""
     v = np.array(list(dic.values()))

diff --git a/setup.py b/setup.py
@@ -38,5 +38,6 @@
                  'Programming Language :: Python :: 3.4',
                  'Programming Language :: Python :: 3.5',
                  'Programming Language :: Python :: 3.6',
+                 'Programming Language :: Python :: 3.7',
                  'Topic :: Scientific/Engineering'],
 )