-
-
Notifications
You must be signed in to change notification settings - Fork 1k
/
clustering_kmeans.cpp
115 lines (93 loc) · 3.56 KB
/
clustering_kmeans.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/modelselection/GridSearchModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/clustering/KMeans.h>
#include <shogun/distance/EuclideanDistance.h>
#include <shogun/distance/MinkowskiMetric.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
int32_t num_clusters=4;
int32_t num_features=11;
int32_t dim_features=3;
int32_t num_vectors_per_cluster=5;
float64_t cluster_std_dev=2.0;
auto prng = get_prng();
/* build random cluster centers */
SGMatrix<float64_t> cluster_centers(dim_features, num_clusters);
SGVector<float64_t>::random_vector(cluster_centers.matrix, dim_features*num_clusters,
-10.0, 10.0);
SGMatrix<float64_t>::display_matrix(cluster_centers.matrix, cluster_centers.num_rows,
cluster_centers.num_cols, "cluster centers");
/* create data around clusters */
SGMatrix<float64_t> data(dim_features, num_clusters*num_vectors_per_cluster);
for (index_t i=0; i<num_clusters; ++i)
{
for (index_t j=0; j<dim_features; ++j)
{
for (index_t k=0; k<num_vectors_per_cluster; ++k)
{
index_t idx=i*dim_features*num_vectors_per_cluster;
idx+=j;
idx+=k*dim_features;
float64_t entry=cluster_centers.matrix[i*dim_features+j];
std::normal_distribution<float64_t> dist(
entry, cluster_std_dev);
data.matrix[idx] = dist(prng);
}
}
}
/* create features, SG_REF to avoid deletion */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> ();
features->set_feature_matrix(data);
SG_REF(features);
/* create labels for cluster centers */
CMulticlassLabels* labels=new CMulticlassLabels(num_features);
for (index_t i=0; i<num_features; ++i)
labels->set_label(i, i%2==0 ? 0 : 1);
/* create distance */
CEuclideanDistance* distance=new CEuclideanDistance(features, features);
/* create distance machine */
CKMeans* clustering=new CKMeans(num_clusters, distance);
clustering->train(features);
/* build clusters */
CMulticlassLabels* result=CLabelsFactory::to_multiclass(clustering->apply());
for (index_t i=0; i<result->get_num_labels(); ++i)
SG_SPRINT("cluster index of vector %i: %f\n", i, result->get_label(i));
/* print cluster centers */
CDenseFeatures<float64_t>* centers=
(CDenseFeatures<float64_t>*)distance->get_lhs();
SGMatrix<float64_t> centers_matrix=centers->get_feature_matrix();
SGMatrix<float64_t>::display_matrix(centers_matrix.matrix, centers_matrix.num_rows,
centers_matrix.num_cols, "learned centers");
SGMatrix<float64_t>::display_matrix(cluster_centers.matrix, cluster_centers.num_rows,
cluster_centers.num_cols, "real centers");
/* clean up */
SG_UNREF(result);
SG_UNREF(centers);
SG_UNREF(clustering);
SG_UNREF(labels);
SG_UNREF(features);
exit_shogun();
return 0;
}