In [2]:
#!/usr/bin/env python

import numpy as np
from kmodes import kprototypes

# stocks with their market caps, sectors and countries
syms = np.genfromtxt('stocks.csv', dtype=str, delimiter=',')[:, 0]
X = np.genfromtxt('stocks.csv', dtype=object, delimiter=',')[:, 1:]
X[:, 0] = X[:, 0].astype(float)

kproto = kprototypes.KPrototypes(n_clusters=4, init='Cao', verbose=2)
clusters = kproto.fit_predict(X, categorical=[1, 2])

# Print cluster centroids and categorical data mapping of the trained model.
print(kproto.cluster_centroids_)
print(kproto.enc_map_)
# Print training statistics
print(kproto.cost_)
print(kproto.n_iter_)

for s, c in zip(syms, clusters):
    print("Symbol: {}, cluster:{}".format(s, c))
Initialization method and algorithm are deterministic. Setting n_init to 1.
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-2-1304e1977b13> in <module>()
     10 
     11 kproto = kprototypes.KPrototypes(n_clusters=4, init='Cao', verbose=2)
---> 12 clusters = kproto.fit_predict(X, categorical=[1, 2])
     13 
     14 # Print cluster centroids and categorical data mapping of the trained model.

/usr/local/lib/python2.7/site-packages/kmodes-0.6-py2.7.egg/kmodes/kmodes.pyc in fit_predict(self, X, y, **kwargs)
    353         predict(X).
    354         """
--> 355         return self.fit(X, **kwargs).labels_
    356 
    357     def predict(self, X, **kwargs):

/usr/local/lib/python2.7/site-packages/kmodes-0.6-py2.7.egg/kmodes/kprototypes.pyc in fit(self, X, y, categorical)
    385                                                      self.n_init,
    386                                                      self.max_iter,
--> 387                                                      self.verbose)
    388         return self
    389 

/usr/local/lib/python2.7/site-packages/kmodes-0.6-py2.7.egg/kmodes/kprototypes.pyc in k_prototypes(X, categorical, n_clusters, gamma, init, n_init, max_iter, verbose)
    161     # Are there more n_clusters than unique rows? Then set the unique
    162     # rows as initial values and skip iteration.
--> 163     unique = get_unique_rows(X)
    164     n_unique = unique.shape[0]
    165     if n_unique <= n_clusters:

/usr/local/lib/python2.7/site-packages/kmodes-0.6-py2.7.egg/kmodes/util/__init__.pyc in get_unique_rows(a)
     53     """Gets the unique rows in a numpy array."""
     54     b = np.ascontiguousarray(a).view(
---> 55         np.dtype((np.void, a.dtype.itemsize * a.shape[1])))
     56     _, idx = np.unique(b, return_index=True)
     57     return a[idx]

/usr/local/lib/python2.7/site-packages/numpy/core/_internal.pyc in _view_is_safe(oldtype, newtype)
    365 
    366     if newtype.hasobject or oldtype.hasobject:
--> 367         raise TypeError("Cannot change data-type for object array.")
    368     return
    369 

TypeError: Cannot change data-type for object array.
In [ ]: