-
Notifications
You must be signed in to change notification settings - Fork 2
/
bd_vmfmm.m
138 lines (108 loc) · 4.53 KB
/
bd_vmfmm.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
function params = bd_vmfmm(vectors, k, clust)
% Performs clustering using vMF mixture model.
% See Sect. 3 and 4 of ref [1] or Sect. 2 of ref [2]
% INPUT:
% vectors: feature vectors (N x 3)
% k : Number of clusters
% clust (optional): Initial labels (cluster assignments) for the data
% OUTPUT
% params: different types of information related to clustering.
% *note* If the the cluster labels are desired, then one can directly get them
% from 'params.label' and ignore the rest
% Reference:
% [1] Hasnat et al., Model-based hierarchical clustering with Bregman
% divergences and Fishers mixture model: application to depth image analysis.
% Statistics and Computing, 1-20, 2015.
%
% [2] Hasnat et al., Hierarchical 3-D von Mises-Fisher Mixture Model, ICML-WDDL, 2013.
%
% Author: Md Abul HASNAT
desNumComp = k;
[D,V] = size(vectors);
numOfDataSample = D;
dim = V;
if(nargin<3)
clust = kmeanspp(vectors',k);
end
%% Bragman soft clustering
MAX_ITERATIONS = 200;
% Initialization
% Compute the weight and expectation parameter from the available
% information for each cluster
i = 1;
for j=1:desNumComp
numOfDataPointToThisCluster = length(find(clust==j));
alpha(i) = numOfDataPointToThisCluster / numOfDataSample; % weight
% Expectation parameter computation
indx = find(clust==j);
dataClust = vectors(indx, :);
sufStat(i, :) = sum(dataClust);
eta(i, :) = sufStat(i, :) / length(dataClust);
normEta(i) = sqrt(eta(i, :) * eta(i, :)');
normTheta(i) = getThetaFromEta(normEta(i));
% Compute R(normTheta)
R_norm_theta(i) = ((1/tanh(normTheta(i))) - (1/normTheta(i))) / normTheta(i);
theta_cl(i, :) = eta(i, :) ./ R_norm_theta(i); % natural parameter
i = i+1;
end
clear i;
% Start the EM algorithm
logLikelihoodNew = 100;
logLikelihoodOld = 0;
logLikelihoodThreshold = 0.001;
iterations = 1;
diffLLH = abs(logLikelihoodOld-logLikelihoodNew);
while(iterations<MAX_ITERATIONS && diffLLH>logLikelihoodThreshold)
logLikelihoodOld = logLikelihoodNew;
% Expectation Step
for j=1:k
% Compute Bragman divergence
Log_Normalizing_Function(j) = log((4*pi*sinh(normTheta(j))) / normTheta(j));
Dual_Log_Normalizing_Function(j) = (eta(j, :) * theta_cl(j, :)') - Log_Normalizing_Function(j);
Grad_Dual_Log_Normalizing_Function(j,:) = theta_cl(j, :);
sufStat_minus_expectParam(:, :, j) = bsxfun(@minus, vectors , eta(j, :));
innerProdTerm(:,j) = sufStat_minus_expectParam(:, :, j) * Grad_Dual_Log_Normalizing_Function(j,:)';
% Compute divergence
divergence(:,j) = -(Dual_Log_Normalizing_Function(j) + innerProdTerm(:,j));
expTerm(:,j) = exp(-divergence(:,j));
probTerm(:, j) = alpha(j) * expTerm(:,j);
end
probTerm = bsxfun(@rdivide, probTerm, sum(probTerm, 2));
[~, clust] = max(probTerm,[], 2);
% Maximization Step
sumProbTerm = sum(probTerm);
% Update weight
alpha = sumProbTerm ./ size(probTerm,1);
% Update parameter
for j=1:k
eta(j, :) = sum(bsxfun(@times, probTerm(:,j) , vectors)) ./ sumProbTerm(j);
% Convert to source parameters (mu, kappa) from expectation parameter
% (eta)
normEta(j) = sqrt(eta(j, :) * eta(j, :)');
normTheta(j) = getThetaFromEta(normEta(j));
% Compute R(normTheta)
R_norm_theta(j) = ((1/tanh(normTheta(j))) - (1/normTheta(j))) / normTheta(j);
theta_cl(j, :) = eta(j, :) ./ R_norm_theta(j);
update_kappa(j) = normTheta(j);
update_mu(j, :) = theta_cl(j, :) ./ normTheta(j);
end
% Compute new log likelihood
logWeight = log(alpha);
logNormTerm = log(update_kappa) - log(4*pi*sinh(update_kappa));
logExpTerm = bsxfun(@times, update_kappa, (update_mu * vectors')');
logClassCondLiklihood = bsxfun(@plus, logWeight + logNormTerm , logExpTerm);
ClassCondLiklihood = exp(logClassCondLiklihood);
logMixtureLikelihoodPerComponent = log(sum(ClassCondLiklihood,2));
logLikelihoodNew = sum(logMixtureLikelihoodPerComponent);
diffLLH = abs(logLikelihoodOld-logLikelihoodNew);
LLH(iterations) = -logLikelihoodNew;
iterations = iterations+1;
end
%% Save Parameters
params.expectation = eta;
params.natural = theta_cl;
params.source.kappa = update_kappa;
params.source.mu = update_mu;
params.weight = alpha;
params.label = clust;
params.cp = probTerm;