-
Notifications
You must be signed in to change notification settings - Fork 4
/
pddp.m
77 lines (67 loc) · 2.48 KB
/
pddp.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
function idx = pddp(data, maxClusts, minPoints, debug)
% PDDP Perform P.D.D.P. (principal direction divisive clustering) on input
% data.
%
% idx = PDDP(data, maxClusts, minPoints, debug)
%
% Parameters:
% data - m x n, with m samples and n dimensions
% maxClusts - maximum number of clusters to form (optional, default is inf)
% minPoints - minimum number of points in each cluster (optional, default
% is ceil((size(data, 2)+1)/2) )
% debug - Show debug info? (optional, default is false)
% Output:
% idx - final clustering result
%
% N. Fachada
% Instituto Superior Técnico, Lisboa, Portugal
% Check if maximum number of clusters is given as argument
if nargin < 2, maxClusts = inf; end;
% Check if minimum number of points in each cluster is given
if nargin < 3, minPoints = ceil((size(data, 2) + 1)/2); end;
% Check if debug info is given
if nargin < 4, debug = 0; end;
% Get number of samples and number of dimensions
numSamples = size(data, 1);
% Pre-allocate idx
idx = ones(numSamples, 1);
% Start algorithm
while true
% Get largest cluster
largest = mode(idx);
% Get indexes of points in largest cluster
iIndexes = find(idx == largest);
% How many points does the largest cluster contain?
numObs = size(iIndexes, 1);
% Print some info
debugf(sprintf('Largest cluster has %d observations, data has %d dimensions, minPoints is %d.\n', numObs, size(data, 2), minPoints), debug);
% See if its big enough to make a volume
if numObs > minPoints * 2
% Perform
[~, SCORE] = princomp(data(iIndexes, :), 'econ');
newCluster = max(idx) + 1;
newIndexes = iIndexes(SCORE(:, 1) > 0);
numObsNewClust = size(newIndexes, 1);
debugf(sprintf(' - Number of observations to go to new cluster: %d\n', numObsNewClust), debug);
if min(numObsNewClust, numObs - numObsNewClust) < minPoints
debugf(' - One of the clusters is not big enough, terminating algorithm...\n', debug);
break;
end;
idx(newIndexes) = newCluster;
if newCluster == maxClusts
debugf(' - Reached maximum number of clusters, terminating algorithm...\n', debug);
break;
end;
else
% Can't divide no more, end algorithm
debugf(' - Largest cluster is not divisible, terminating algorithm...\n', debug);
break;
end;
end;
end % pddp
% Debug helper function
function debugf(str, debug)
if debug
fprintf(str);
end;
end