-
Notifications
You must be signed in to change notification settings - Fork 0
/
clustering.py
149 lines (128 loc) · 5.52 KB
/
clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import dataProcessing
import algorithms
import visualization
import argparse
# add parsers
parser = argparse.ArgumentParser()
parser.add_argument('-a','--algorithm',type = str, help = 'Choose the algorithm')
algorithm = parser.add_subparsers(help = 'algorithm choices')
'''
Running K means algorithm:
python clustering.py -a Kmeans k [the k value]
'''
kmeans_parser = algorithm.add_parser('k', help = 'number of clusters')
kmeans_parser.add_argument('K', type = int, help = 'input the number of clusters')
'''
Running DBSCAN:
python clustering.py -a DBSCAN eps [the eps value] mindist [the mindist value]
'''
eps_parser = algorithm.add_parser('eps',help = 'the epsilon value of a cluster')
eps_parser.add_argument('Eps', type = float, help = 'the input epsilon')
min_dist = eps_parser.add_subparsers(help = 'the minimum distance')
minDist_parser = min_dist.add_parser('mindist', help = 'minimum distance for a reachable point')
minDist_parser.add_argument('minDist', type = int, help = 'the value of minimum distance')
'''
Running Hierarchical Clustering
python clustering.py -a HC n [the n value]
'''
HC_parser = algorithm.add_parser('n', help = 'the number of clusters in Hierarchical Clustering')
HC_parser.add_argument('N', type = int, help = 'the value of n')
'''
Running Hierachical to get the dendrogram
python clustering.py -a HC_2
'''
HC_2_parser = algorithm.add_parser('default', help = 'no input arguments')
HC_2_parser.add_argument('n', type = int, help = 'the second HC algorithm')
'''
Running Spectral Clustering
python clustering.py -a spectral n [the n value]
'''
spectral_parser = algorithm.add_parser('n', help = 'the number of clusters in spectral clustering')
spectral_parser.add_argument('N', type = int, help = 'the value of n')
'''
Running affinity propagation
python clustering.py -a affinity d [the damping value] mi [value of max_iter]
'''
damping_parser = algorithm.add_parser('d', help = 'the damping factor')
damping_parser.add_argument('D', type = float, help ='the value of d')
MI = damping_parser.add_subparsers(help = 'the mi parser')
mi_parser = MI.add_parser('mi', help = 'the max_iter')
mi_parser.add_argument('MI', type = int, help = 'the value of mi')
'''
Running rote classification
'''
rote_parser = algorithm.add_parser('default', help = 'no input arguments')
rote_parser.add_argument('n', type = int, help = 'the rote classification')
'''
adding all the parsers
'''
args = parser.parse_args()
# reading the database
database = dataProcessing.readJson()
database = dataProcessing.transformCoordinate(database)
# choosing the stars with names
starsWithName = dataProcessing.chooseStarWithName(database)
# choosing the stars with brighness higher than 4.5
starsNeedClustering = dataProcessing.selectBrightness(starsWithName, 4.6)
# get all the constellation names among the selected stars
constellationNames = dataProcessing.getConstellationNames(starsNeedClustering)
#print constellationNames, len(constellationNames)
#print len(starsNeedClustering)
# if the user runs kmeans
if args.algorithm == 'Kmeans':
K = args.K
# running K means for 1000 times with 20 centroids
standardKMeans = algorithms.KMeansPlusPlus(starsNeedClustering,K)
#standardKMeans.randInitCentroid()
#standardKMeans.decisiveInitCentroid()
#standardKMeans.runStandardKmeansWithIter(2000)
#standardKMeans.runStandardKmeansWithoutIter()
standardKMeans.runKmeansPlusPlus()
# output the stars that belong to centroid 1
# cluster_1 = algorithms.getCluster(1, assignments)
visualization.visualize(standardKMeans.assignments, 'Kmeans')
#print len(assignments), len(cluster_1), cluster_1
# print centroids, assignments
# if the user runs DBSCAN
elif args.algorithm == 'DBSCAN':
Eps = args.Eps
minDist = args.minDist
#print Eps, minDist, len(starsNeedClustering)
standardDBS = algorithms.densityBasedClustering(starsNeedClustering, Eps, minDist)
standardDBS.runDBA()
#print standardDBS.getNumOfClusters()
noise = standardDBS.getNoise()
#print 'Number of noise stars: ', len(noise)
visualization.visualize(standardDBS.assignments, 'DBSCAN')
# if the user runs Hierachical Clustering
elif args.algorithm == 'HC':
n_cluster = args.N
standardHC = algorithms.aggolomerativeClustering(starsNeedClustering, n_cluster)
standardHC.runHierachicalClustering()
visualization.visualize(standardHC.assignments, 'Hierarchical Clustering')
# if the user runs Hierachical Clustering_2
elif args.algorithm == 'HC_2' and args.n == 0:
HC_version_2 = algorithms.hierarchicalClustering(starsNeedClustering)
HC_version_2.runHC_Version_2()
visualization.drawDendrogram(HC_version_2.linkMatrix)
# if the user runs spectral clustering
elif args.algorithm == 'spectral':
n_cluster = args.N
standardSpectralClustering = algorithms.spectralClustering(starsNeedClustering, n_cluster)
standardSpectralClustering.runSpectralClustering()
visualization.visualize(standardSpectralClustering.assignments, 'Spectral Clustering')
# if the user runs affinity propagation
elif args.algorithm == 'affinity':
damping = args.D
max_iter = args.MI
standardAP = algorithms.affinityPropagation(starsNeedClustering, damping, max_iter)
standardAP.runAffinityPropagation()
visualization.visualize(standardAP.assignments, 'Affinity Propagation')
# if the user runs the route classification
elif args.algorithm == 'rote' and args.n == 1:
roteClassification = algorithms.roteClassification(starsNeedClustering, constellationNames)
roteClassification.runRoteClassification()
visualization.visualize(roteClassification.assignments, 'Rote Algorithm')
# if no such algorithm
else:
raise Exception('No Such Bult-in Algorithm')