In [None]:
class KMeans(object):
	
	import numpy as np
	
	def __init__(self, n_clusters=10, initCent='random', max_iter=300):
		'''
		n_clusters表示聚类的个数，相当于 k 
		initCent表示质心的初始化方式，可以设为'random'或指定一个数组
		max_iter表示最大迭代次数
		'''
		import numpy as np
		if hasattr(initCent, '__array__'):
			n_clusters = initCent.shape[0] #聚类个数
			self.centroids = np.asarray(initCent , dtype=np.float) #聚类中心
		else:
			self.centroids = None
		
		self.n_clusters = n_clusters	#聚类个数
		self.max_iter = max_iter		#最大迭代次数
		self.initCent = initCent		#选择质心的初始化方式
		
		self.clusterAssment = None		#X.shape[0]*2矩阵,第一列存储样本所属簇的标签值，第二列是该点与簇质心的均方误差
		self.labels = None				# self.clusterAssment[:,0] 
		self.sse = None					# sum(self.clusterAssment[:,1] )
		
	
	def _distEclud(self, vecA, vecB):
		'''计算两点的欧氏距离'''
		import numpy as np
		return np.linalg.norm(vecA - vecB)
		
		
		'''欧氏距离 ——tensorflow
			return tf.sqrt(tf.reduce_sum(tf.pow((xtr-xte), 2), reduction_indices=1))
			
			曼哈顿距离 ——tensorflow
			return tf.reduce_sum(tf.abs(xtr-xte),reduction_indices=1)
		'''
		
	def _randCent(self, X, k):
		'''随机选取k个质心，必须在数据集的边界内'''
		import numpy as np
		n = X.shape[1] #特征维数
		centroids = np.empty((k,n)) # np.empty(shape, dtype=float, order='C'),用于产生k*n的矩阵存储质心
		for j in range(n):
			minJ = min(X[:,j])
			rangeJ = float(max(X[:,j]) - minJ)
			centroids[:,j] = (minJ+rangeJ * np.random.rand(k,1)).flatten() #根据每一列的最大与最小值产生 k*n 的随机质心
		
		return centroids
			

	def fit(self, X):
		'''调用fit方法，对数据集X聚类
		聚类完后将得到质心self.centroids, 簇分配结果self.clusterAssment'''
		import numpy as np
		if not isinstance(X,np.ndarray):
			try:
				X = np.asarray(X)
			except:
				raise TypeError('X need 是一个numpy.ndarray类型')
		m = X.shape[0] #样本数量
		self.clusterAssment = np.empty((m,2)) #m*2矩阵,第一列存储样本所属簇的标签值，第二列是该点与簇质心的均方误差
		
		if self.initCent == 'random':
			self.centroids = self._randCent(X, self.n_clusters)
		
		clusterChanged = True
		for _ in range(self.max_iter):
			clusterChanged = False
			for i in range(m):
			#'''每个样本点分配到离他最近的簇'''
				minDist = np.inf	#一个无穷大数
				minIndex = -1
				for j in range(self.n_clusters):
					distJI = self._distEclud(self.centroids[j,:],X[i,:])
					if distJI < minDist:
						minDist = distJI
						minIndex = j
				if self.clusterAssment[i,0] != minIndex:	#修改第i个样本的标签和与标签对应簇的方差
					clusterChanged = True
					self.clusterAssment[i,:] = minIndex,minDist**2
				
			if not clusterChanged:  #当样本元素没有发生变动时跳出
				break
			for i in range(self.n_clusters):
				ptsInClust = X[np.nonzero(self.clusterAssment[:,0] == i)[0]]#取出点
				self.centroids[i,:] = np.mean(ptsInClust ,axis=0)
		self.labels = self.clusterAssment[:,0]
		#self,sse = sum(self.clusterAssment[:,1])
				
	def predict(self, X):
		'''根据聚类结果，预测新输入数据所属的簇'''
		import numpy as np
		if not isinstance(X,np.ndarray):
			try:
				X = np.asarray(X)
			except:
				raise TypeError('X need 是一个numpy.ndarray类型')
		m = X.shape[0]
		preds = np.empty((m,))
		for i in range(m):
			minDist = np.inf
			for j in range(self.n_clusters):
				distJI = self._distEclud(self.centroids[j,:],X[i,:])
				if distJI < minDist:
					minDist = distJI
					preds[i] = j
		return preds

In [None]:
import time
mnist = input_data.read_data_sets('data/fashion',one_hot=True,validation_size=0)

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data