forked from serengil/tensorflow-101
/
KMeansClustering.py
100 lines (76 loc) · 2.38 KB
/
KMeansClustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import tensorflow as tf
import numpy as np
import pylab as pl
from tensorflow.contrib.factorization.python.ops import clustering_ops
from mpl_toolkits.mplot3d import Axes3D
#-----------------------------------------
#variables
classes = 3 # define number of clusters
display3D = True
#-----------------------------------------
#dataset
#monthly expenses, net assets
atributes = [
[1100, 1200]
, [2200, 2500]
, [3300, 3600]
, [2400, 2700]
, [14100, 3200]
, [4120, 15200]
, [3125, 3600]
, [2400, 13700]
, [3100, 3200]
, [4100, 4200]
, [13100, 13200]
, [4110, 14200]
, [5100, 15200]
]
row = len(atributes)
col = len(atributes[0])
print("[", row,"x",col,"] sized input")
if display3D == False:
for i in range(row):
pl.scatter(atributes[i][0], atributes[i][1], c='black')
pl.show()
#-----------------------------------------
model = tf.contrib.learn.KMeansClustering(
classes
, distance_metric = clustering_ops.SQUARED_EUCLIDEAN_DISTANCE #SQUARED_EUCLIDEAN_DISTANCE, COSINE_DISTANCE
, initial_clusters=tf.contrib.learn.KMeansClustering.RANDOM_INIT
)
#-----------------------------------------
def train_input_fn():
data = tf.constant(atributes, tf.float32)
return (data, None)
model.fit(input_fn=train_input_fn, steps=5000)
print("--------------------")
print("kmeans model: ",model)
def predict_input_fn():
return np.array(atributes, np.float32)
predictions = model.predict(input_fn=predict_input_fn, as_iterable=True)
colors = ['orange', 'red', 'blue']
print("--------------------")
if display3D == True:
fig = pl.figure()
ax = fig.add_subplot(111, projection='3d')
index = 0
for i in predictions:
print("[", atributes[index],"] -> cluster_",i['cluster_idx'])
if display3D == False:
pl.scatter(atributes[index][0], atributes[index][1], c=colors[i['cluster_idx']]) #2d graph
if display3D == True:
ax.scatter(atributes[index][0], atributes[index][1], c=colors[i['cluster_idx']]) #3d graph
index = index + 1
pl.show()
#-----------------------------------------
"""
#to predict the cluster of new instances
testset = [[1.3, 1.2]
, [2.1, 2.3]
]
def newinstances_input_fn():
return np.array(testset, np.float32)
predictions = model.predict(input_fn=newinstances_input_fn, as_iterable=True)
for i in predictions:
print("cluster_",i['cluster_idx'])
"""