Skip to content

Commit

Permalink
Merge 033d087 into cff2153
Browse files Browse the repository at this point in the history
  • Loading branch information
daffidwilde committed Apr 5, 2019
2 parents cff2153 + 033d087 commit 2996665
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 32 deletions.
37 changes: 23 additions & 14 deletions kmodes/kmodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,10 @@ def k_modes_single(X, n_clusters, n_points, n_attrs, max_iter, dissim, init, ini
itr = 0
labels = None
converged = False
cost = np.Inf

_, cost = _labels_cost(X, centroids, dissim, membship)

epoch_costs = [cost]
while itr <= max_iter and not converged:
itr += 1
centroids, moves = _k_modes_iter(
Expand All @@ -231,12 +234,13 @@ def k_modes_single(X, n_clusters, n_points, n_attrs, max_iter, dissim, init, ini
# All points seen in this iteration
labels, ncost = _labels_cost(X, centroids, dissim, membship)
converged = (moves == 0) or (ncost >= cost)
epoch_costs.append(ncost)
cost = ncost
if verbose:
print("Run {}, iteration: {}/{}, moves: {}, cost: {}"
.format(init_no + 1, itr, max_iter, moves, cost))

return centroids, labels, cost, itr
return centroids, labels, cost, itr, epoch_costs


def k_modes(X, n_clusters, max_iter, dissim, init, n_init, verbose, random_state, n_jobs):
Expand Down Expand Up @@ -280,14 +284,14 @@ def k_modes(X, n_clusters, max_iter, dissim, init, n_init, verbose, random_state
delayed(k_modes_single)(X, n_clusters, n_points, n_attrs, max_iter,
dissim, init, init_no, verbose, seed)
for init_no, seed in enumerate(seeds))
all_centroids, all_labels, all_costs, all_n_iters = zip(*results)
all_centroids, all_labels, all_costs, all_n_iters, all_epoch_costs = zip(*results)

best = np.argmin(all_costs)
if n_init > 1 and verbose:
print("Best run was number {}".format(best + 1))

return all_centroids[best], enc_map, all_labels[best], \
all_costs[best], all_n_iters[best]
all_costs[best], all_n_iters[best], all_epoch_costs[best]


class KModes(BaseEstimator, ClusterMixin):
Expand Down Expand Up @@ -354,6 +358,9 @@ class KModes(BaseEstimator, ClusterMixin):
n_iter_ : int
The number of iterations the algorithm ran for.
epoch_costs_ :
The cost of the algorithm at each epoch from start to completion.
Notes
-----
See:
Expand Down Expand Up @@ -390,16 +397,18 @@ def fit(self, X, y=None, **kwargs):
"""

random_state = check_random_state(self.random_state)
self._enc_cluster_centroids, self._enc_map, self.labels_,\
self.cost_, self.n_iter_ = k_modes(X,
self.n_clusters,
self.max_iter,
self.cat_dissim,
self.init,
self.n_init,
self.verbose,
random_state,
self.n_jobs)
self._enc_cluster_centroids, self._enc_map, self.labels_, self.cost_, \
self.n_iter_, self.epoch_costs_ = k_modes(
X,
self.n_clusters,
self.max_iter,
self.cat_dissim,
self.init,
self.n_init,
self.verbose,
random_state,
self.n_jobs,
)
return self

def fit_predict(self, X, y=None, **kwargs):
Expand Down
47 changes: 29 additions & 18 deletions kmodes/kprototypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,11 @@ def k_prototypes_single(Xnum, Xcat, nnumattrs, ncatattrs, n_clusters, n_points,
itr = 0
labels = None
converged = False
cost = np.Inf

_, cost = _labels_cost(Xnum, Xcat, centroids,
num_dissim, cat_dissim, gamma, membship)

epoch_costs = [cost]
while itr <= max_iter and not converged:
itr += 1
centroids, moves = _k_prototypes_iter(Xnum, Xcat, centroids,
Expand All @@ -243,12 +247,13 @@ def k_prototypes_single(Xnum, Xcat, nnumattrs, ncatattrs, n_clusters, n_points,
labels, ncost = _labels_cost(Xnum, Xcat, centroids,
num_dissim, cat_dissim, gamma, membship)
converged = (moves == 0) or (ncost >= cost)
epoch_costs.append(ncost)
cost = ncost
if verbose:
print("Run: {}, iteration: {}/{}, moves: {}, ncost: {}"
.format(init_no + 1, itr, max_iter, moves, ncost))

return centroids, labels, cost, itr
return centroids, labels, cost, itr, epoch_costs


def k_prototypes(X, categorical, n_clusters, max_iter, num_dissim, cat_dissim,
Expand Down Expand Up @@ -319,15 +324,15 @@ def k_prototypes(X, categorical, n_clusters, max_iter, num_dissim, cat_dissim,
num_dissim, cat_dissim, gamma,
init, init_no, verbose, seed)
for init_no, seed in enumerate(seeds))
all_centroids, all_labels, all_costs, all_n_iters = zip(*results)
all_centroids, all_labels, all_costs, all_n_iters, all_epoch_costs = zip(*results)

best = np.argmin(all_costs)
if n_init > 1 and verbose:
print("Best run was number {}".format(best + 1))

# Note: return gamma in case it was automatically determined.
return all_centroids[best], enc_map, all_labels[best], \
all_costs[best], all_n_iters[best], gamma
return all_centroids[best], enc_map, all_labels[best], all_costs[best], \
all_n_iters[best], all_epoch_costs[best], gamma


class KPrototypes(kmodes.KModes):
Expand Down Expand Up @@ -403,6 +408,9 @@ class KPrototypes(kmodes.KModes):
n_iter_ : int
The number of iterations the algorithm ran for.
epoch_costs_ :
The cost of the algorithm at each epoch from start to completion.
gamma : float
The (potentially calculated) weighing factor.
Expand Down Expand Up @@ -448,19 +456,22 @@ def fit(self, X, y=None, categorical=None):
random_state = check_random_state(self.random_state)
# If self.gamma is None, gamma will be automatically determined from
# the data. The function below returns its value.
self._enc_cluster_centroids, self._enc_map, self.labels_, self.cost_,\
self.n_iter_, self.gamma = k_prototypes(X,
categorical,
self.n_clusters,
self.max_iter,
self.num_dissim,
self.cat_dissim,
self.gamma,
self.init,
self.n_init,
self.verbose,
random_state,
self.n_jobs)
self._enc_cluster_centroids, self._enc_map, self.labels_, self.cost_, \
self.n_iter_, self.epoch_costs_, self.gamma = k_prototypes(
X,
categorical,
self.n_clusters,
self.max_iter,
self.num_dissim,
self.cat_dissim,
self.gamma,
self.init,
self.n_init,
self.verbose,
random_state,
self.n_jobs
)

return self

def predict(self, X, categorical=None):
Expand Down

0 comments on commit 2996665

Please sign in to comment.