-
Notifications
You must be signed in to change notification settings - Fork 24
/
ex4_largeK_count.R
83 lines (70 loc) · 2.38 KB
/
ex4_largeK_count.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Illustration of cluster growth
# given a Dirichlet-distributed vector
# with K >> N.
# Copyright (C) 2015, Tamara Broderick
# www.tamarabroderick.com
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# useful for sampling from a Dirichlet distribution
library(MCMCpack)
# maximum number of data points to draw
maxN = 1000
# use these parameters by default
K_default = 1000
a_default = 10/K_default
# note: default run with these parameters
# appears at the end
ex4_gen_largeK_count <- function(K,a) {
# Illustrates how number of clusters changes
# with Dirichlet-distributed component probabilities
#
# Args:
# K: Dirichlet parameter vector length
# a: Dirichlet parmeter (will be repeated K times)
#
# Returns:
# Nothing
# make the Dirichlet draw
rhomx = rdirichlet(1,rep(a,K))
# another useful form of rho
rho = as.vector(rhomx)
# records which clusters have been sampled so far
uniq_draws = c()
# cluster samples in order of appearance (ooa)
ooa_clust = c()
for(N in 1:maxN) {
# draw a cluster assignment from the components
draw = sample(1:K, size=1, replace=TRUE, prob=rho)
# update info about cluster draws
uniq_draws = unique(c(uniq_draws, draw))
ooa = which(draw == uniq_draws)
ooa_clust = c(ooa_clust, ooa)
# plot cluster assignments in order of appearance
plot(seq(1,N),
ooa_clust,
xlab="Sample index",
ylab="Cluster by order of appearance",
ylim=c(0,max(10,length(uniq_draws))),
xlim=c(0,max(10,N)),
pch=19,
main=bquote(rho~"~Dirichlet" # ~"("~.(a)~",...,"~.(a)~")"
~", K="~.(K))
)
# Generate a new draw for each press of "enter"
# Press 'x' when finished
line <- readline()
if(line == "x") return("done")
}
}
# default run with default parameters
ex4_gen_largeK_count(K_default, a_default)