-
Notifications
You must be signed in to change notification settings - Fork 0
/
ClusterDiagnostics.Rmd
110 lines (91 loc) · 2.92 KB
/
ClusterDiagnostics.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
---
title: "Cluster diagnostics"
author: "David C. King"
date: "11/26/2022"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(fpc) # for clusterbenchstats
library(tidyverse)
library(ggplot2)
```
```{r data}
set.seed(20000)
options(digits=3)
face <- rFace(10,dMoNo=2,dNoEy=0,p=2)
```
```{r fpc-clusterbenchstats-example-1}
set.seed(20000)
options(digits=3)
face <- rFace(10,dMoNo=2,dNoEy=0,p=2)
clustermethod=c("hclustCBI","hclustCBI")
# A clustering method can be used more than once, with different
# parameters
clustermethodpars <- list()
clustermethodpars[[1]] <- list()
clustermethodpars[[1]]$method <- "complete"
clustermethodpars[[2]] <- list()
clustermethodpars[[2]]$method <- "average"
# Last element of clustermethodpars needs to have an entry!
methodname <- c("complete","average")
cbs <- clusterbenchstats(face,G=2:3,clustermethod=clustermethod,
methodname=methodname,distmethod=rep(FALSE,2),
clustermethodpars=clustermethodpars,nnruns=2,kmruns=2,fnruns=2,avenruns=2)
```
```{r boostrap}
bootclassif <- c("centroid","averagedist")
cbsboot <- clusterbenchstats(face,G=2:3,clustermethod=clustermethod,
methodname=methodname,distmethod=rep(FALSE,2),
clustermethodpars=clustermethodpars,
useboot=TRUE,bootclassif=bootclassif,bootmethod="nselectboot",
bootruns=2,nnruns=1,kmruns=1,fnruns=1,avenruns=1,useallg=TRUE)
print(cbsboot)
```
```{r hclust-diagnostic, eval=FALSE}
bench=clusterbenchstats(dist_euclidean, 2:5,
clustermethod="disthclustCBI",
scaling=FALSE,
clustermethodpars=list(list(method="average")),
nnruns = 10, kmruns=10, fnruns=10,avenruns=10
)
print(bench$sstat,aggregate=TRUE,weights=c(1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0))
```
# clusterboot
```{r cf1}
options(digits=3)
set.seed(20000)
face <- rFace(50,dMoNo=2,dNoEy=0,p=2)
cf1 <- clusterboot(face,B=10,bootmethod=
c("boot","noise","jitter"),clustermethod=kmeansCBI,
k=10,seed=15555)
print(cf1)
plot(cf1)
```
```{r cf2}
cf2 <- clusterboot(dist(face),B=3,bootmethod=
"subset",clustermethod=disthclustCBI,
k=5, cut="number", method="average", showplots=FALSE, seed=15555)
print(cf2)
```
```{r d12x}
d1 <- c("a","b","a","c")
d2 <- c("a","a","a","b")
dx <- as.data.frame(cbind(d1,d2))
cpx <- clusterboot(dx,k=2,B=10,clustermethod=claraCBI,
multipleboot=TRUE,usepam=TRUE,datatomatrix=FALSE)
print(cpx)
```
# cqclusters.stats
```{r cqcluster-stats}
set.seed(20000)
options(digits=3)
face <- rFace(200,dMoNo=2,dNoEy=0,p=2)
dface <- dist(face)
complete3 <- cutree(hclust(dface),3)
cqcluster.stats(dface,complete3,
alt.clustering=as.integer(attr(face,"grouping")))
df.face = as.data.frame(face)
df.face$grouping = attr(face, 'grouping')
ggplot(df.face, aes(x=V1,y=V2, color=grouping)) + geom_point()
```