Skip to content

Commit

Permalink
* Added Binomial cost function to cpt.meanvar(). Thanks go to Simon T…
Browse files Browse the repository at this point in the history
…aylor for contributing this new cost function.

* Changed default method in cpt.*() functions to PELT
* Fixed bug in SegNeigh algorithm where if the distribution used was Gamma the shape parameter was not passed to the lower functions.  Thanks go to Simon Taylor for highlighting the bug.
  • Loading branch information
rkillick committed Jul 6, 2023
1 parent 5253a05 commit 32d2e6f
Show file tree
Hide file tree
Showing 31 changed files with 551 additions and 83 deletions.
18 changes: 11 additions & 7 deletions DESCRIPTION
@@ -1,13 +1,16 @@
Package: changepoint
Type: Package
Title: Methods for Changepoint Detection
Version: 2.2.5
Date: 2022-11-08
Version: 2.3
Date: 2023-07-06
Authors@R: c(person("Rebecca", "Killick",
role=c("aut","cre"),email="r.killick@lancs.ac.uk"),
person("Kaylea", "Haynes", role="aut"),
person("Idris", "Eckley", role=c("ths")),
person("Paul","Fearnhead",role=c("ctb","ths")), person("Robin", "Long", role="ctb"),person("Jamie","Lee",role="ctr"))
person("Kaylea", "Haynes", role="aut"),
person("Paul","Fearnhead",role=c("ctb","ths")),
person("Robin", "Long", role="ctb"),
person("Simon", "Taylor",role="ctb"),
person("Idris", "Eckley", role=c("ths")),
person("Jamie","Lee",role="ctr"))
Maintainer: Rebecca Killick <r.killick@lancs.ac.uk>
BugReports: https://github.com/rkillick/changepoint/issues
URL: https://github.com/rkillick/changepoint/
Expand All @@ -16,14 +19,15 @@ Depends: R(>= 3.2), methods, stats, zoo(>= 0.9-1)
Suggests: testthat, vdiffr
License: GPL
LazyData: true
Packaged: 2022-11-08 14:14:13 UTC; killick
Packaged: 2023-07-06 14:14:13 UTC; killick
NeedsCompilation: yes
Repository: CRAN
Date/Publication: 2022-11-08 15:50:02 UTC
Author: Rebecca Killick [aut, cre],
Kaylea Haynes [aut],
Idris Eckley [ths],
Paul Fearnhead [ctb, ths],
Robin Long [ctb],
Simon Taylor [ctb],
Idris Eckley [ths],
Jamie Lee [ctr]

7 changes: 7 additions & 0 deletions NEWS
@@ -1,3 +1,10 @@
Version 2.3
===========
* Added Binomial cost function to cpt.meanvar(). Thanks go to Simon Taylor for contributing this new cost function.
* Changed default method in cpt.*() functions to PELT
* Fixed bug in SegNeigh algorithm where if the distribution used was Gamma the shape parameter was not passed to the lower functions. Thanks go to Simon Taylor for highlighting the bug.


Version 2.2.5
=============
* Thanks to Toby Hocking for highlighting that when pen=0, BinSeg doesn't need to warn that ncpts=Q as this is by design. Warning for pen=0 removed.
Expand Down
14 changes: 8 additions & 6 deletions R/CROPS.R
@@ -1,17 +1,19 @@
CROPS <- function(data, penalty="CROPS", pen.value, method="PELT", test.stat="Normal", class=TRUE, param.est=TRUE, minseglen, shape, func){
CROPS <- function(data, penalty="CROPS", pen.value, method="PELT", test.stat="Normal", class=TRUE, param.est=TRUE, minseglen, shape, func, size){
if(method != "PELT"){stop('CROPS is a valid penalty choice only if method="PELT", please change your method or your penalty.')}
mu <- mean(data)
sumstat=cbind(c(0,cumsum(coredata(data))),c(0,cumsum(coredata(data)^2)),cumsum(c(0,(coredata(data)-mu)^2)))


switch(test.stat,
"Normal" = {stat = "norm"},
"Exponential" = {stat = "exp"},
"Gamma" = {stat = "gamma"},
"Poisson" = {stat = "poisson"},
{stop("Only Normal, Exponential, Gamma and Poisson are valid test statistics")}
"Binomial" = {stat = "binomial"},
{stop("Only Normal, Exponential, Gamma, Poisson and Binomial are valid test statistics")}
)
costfunc = paste0(func, ".", stat)

sumstat = SumStats(test.stat, data, size)

out = range_of_penalties(sumstat, cost=costfunc, min_pen=pen.value[1], max_pen=pen.value[2], minseglen=minseglen)

if(func=="var"){
Expand All @@ -23,10 +25,10 @@ CROPS <- function(data, penalty="CROPS", pen.value, method="PELT", test.stat="No
}

if(class==TRUE){
ans = class_input(data=data,cpttype=cpttype, method="PELT", test.stat=test.stat, penalty=penalty, pen.value=pen.value, minseglen=minseglen, param.estimates=param.est, out=out,shape=shape)
ans = class_input(data=data,cpttype=cpttype, method="PELT", test.stat=test.stat, penalty=penalty, pen.value=pen.value, minseglen=minseglen, param.estimates=param.est, out=out,shape=shape,size=size)
if(func=="var"){
param.est(ans)=c(param.est(ans),mean=mu)
}
return(ans)
}else{return(out)}
}
}
12 changes: 8 additions & 4 deletions R/SegNeigh_one_func_minseglen.R
Expand Up @@ -4,14 +4,18 @@ dataToString <- function(data){



SEGNEIGH <- function(data, pen.value, costfunc, Q, var=0, shape=1){
SEGNEIGH <- function(data, pen.value, costfunc, Q, var=0, shape=1,size=NA){

#split the costfunction by . and then use the first two elements in eval(parse())
if(Q<=0){stop(paste('Q is the maximum number of changepoints so should be greater than 0'))}

split=strsplit(costfunc, "\\.")
if(var!=0){
text = c("segneigh",".", split[[1]][1],".", split[[1]][2],"(",dataToString(data),",",Q,",",pen.value,",","know.mean=",TRUE,",","mu=",var,")")
if(split[[1]][2] == "gamma"){
text = c("segneigh",".", split[[1]][1],".", split[[1]][2],"(",dataToString(data),",",shape,",",Q,",",pen.value,")")
}else if(split[[1]][2] == "binomial"){
text = c("segneigh",".", split[[1]][1],".", split[[1]][2],"(",dataToString(data),",",dataToString(size),",",Q,",",pen.value,")")
}else if(var!=0){
text = c("segneigh",".", split[[1]][1],".", split[[1]][2],"(",dataToString(data),",",Q,",",pen.value,",","know.mean=",TRUE,",","mu=",var,")")
}else{
text = c("segneigh",".", split[[1]][1],".", split[[1]][2],"(",dataToString(data),",",Q,",",pen.value,")")
}
Expand All @@ -21,4 +25,4 @@ SEGNEIGH <- function(data, pen.value, costfunc, Q, var=0, shape=1){
out = eval(parse(text=paste(text, collapse="")))

return(out)
}
}
273 changes: 273 additions & 0 deletions R/binomial.R
@@ -0,0 +1,273 @@
cpt.meanvar.binom <- function(data,penalty="MBIC",pen.value=0,method="PELT",Q=5,class=TRUE,
param.estimates=TRUE,minseglen=2,size=NA){
##Checks already performed by cpt.meanvar:
# data contains no NAs
# method=="SegNeigh" & minseglen>2 ==> stop()
if(is.na(size)){
stop("For Binomial cost functions the size argument must be specified.")
}
if(method=="AMOC"){
return(single.meanvar.binomial(data, size, penalty, pen.value, class,
param.estimates,minseglen))
}else if(method=="PELT" || method=="BinSeg" || method=="SegNeigh"){
return(multiple.meanvar.binomial(data, size, mul.method=method,penalty,
pen.value,Q,class,param.estimates,minseglen))
}else{
stop("Invalid Method, must be AMOC, PELT, SegNeigh or BinSeg")
}
}

single.meanvar.binomial <- function(data, size, penalty="MBIC",pen.value=0,class=TRUE,
param.estimates=TRUE,minseglen){
##checks on data & size
if(any(data<0) || any(size<0)) stop("data and size needs to be positive.")
if(any(data%%1!=0) || any(size%%1!=0)) stop("data and size needs to be integers.")
if(length(size)==1){
SIZE <- data*0 + size ## repeat size to the dimension of data & inherit class
}else if(length(data)==length(size)){
if(!is.numeric(size)) stop("Only numeric size allowed")
if(anyNA(size)){stop("Missing value: NA is not allowed in size.")}
SIZE <- size
}else{
stop("Size needs to be either a single number or have the same dimensions as data.")
}
if(any(data > SIZE)) stop("Elements of data cannot be greater than the corresponding element of size.")
if(is.null(dim(data))){
#single dataset
n <- length(data)
}else{
n <- nrow(data)
}
if(n<4) stop("Data must have at least 4 observations to fit this changepoint model.")
if(n<(2*minseglen)) stop("Minimum segment length is too large to include a changepoint in this data")

pen.value = penalty_decision(penalty, pen.value, n, diffparam=1, asymcheck="meanvar.binomial",method="AMOC")
if(is.null(dim(data))){
#single dataset
tmp = single.meanvar.binomial.calc(coredata(data),coredata(SIZE),extrainf=TRUE,minseglen)
if(penalty=="MBIC"){
tmp[3] = tmp[3] + log(tmp[1]) + log(n-tmp[1]+1)
}
ans = decision(tmp[1],tmp[2],tmp[3],penalty,n,diffparam=1,pen.value)
if(class){
return(class_input(data, size=SIZE, method="AMOC",
penalty=penalty, pen.value=ans$pen,
minseglen=minseglen, param.estimates=param.estimates,
out = c(0,ans$cpt)))
}else{
return(ans$cpt)
}
}else{
#multiple datasets
tmp = single.meanvar.binomial.calc(data,SIZE,extrainf=TRUE,minseglen)
if(penalty=="MBIC"){
tmp[,3] = tmp[,3] + log(tmp[,1]) + log(n-tmp[,1]+1)
}
ans = decision(tmp[,1],tmp[,2],tmp[,3],penalty,n,diffparam=1,pen.value)
if(class){
rep = ncol(data)
out = list()
for(i in 1:rep){
if(length(size)==1){ size_out<- size }else{ size_out<-size[,i] }
# the above looks repetitive from line 25 but 25 is generic and 71 is specific to multivariate
out[[i]] <- class_input(data[,i],size=size_out,
method="AMOC", penalty=penalty,
pen.value=ans$pen, minseglen=minseglen,
param.estimates=param.estimates, out = c(0,ans$cpt[i]))
}
return(out)
}else{
return(ans$cpt)
}
}
}



single.meanvar.binomial.calc <- function(data,size,extrainf=TRUE,minseglen){
singledim <- function(data,size,extrainf=TRUE,minseglen){
n = length(data)
y = c(0,cumsum(data))
m = c(0,cumsum(size))
if(y[n+1] == 0 || y[n+1]==m[n+1]){
null = Inf
}else{
null=-2*(y[n+1]*log(y[n+1])-m[n+1]*log(m[n+1]) + (m[n+1]-y[n+1])*log(m[n+1]-y[n+1]))
}
taustar = minseglen:(n-minseglen)
sx1 = y[taustar+1]
sm1 = m[taustar+1]
sx1_sm1 = sm1-sx1
sx2 = y[n+1]-y[taustar+1]
sm2 = m[n+1]-m[taustar+1]
sx2_sm2 = sm2-sx2
tmp = sx1*log(sx1) - sm1*log(sm1) + sx1_sm1*log(sx1_sm1)
tmp = tmp + sx2*log(sx2) - sm2*log(sm2) + sx2_sm2*log(sx2_sm2)
##NOTE:
## probMLE = c(rep(sx1/sm1,taustar[i]),rep(sx2/sm2,n-taustar[i]))
## sum(dbinom(data,size,probMLE,log=TRUE)) == tmp + sum(lchoose(size,data))

tmp = -2*tmp #convert to deviance scale
if(anyNA(tmp)) tmp[is.na(tmp)] = Inf
tau=which(tmp==min(tmp,na.rm=T))[1]
taulike = tmp[tau]
tau = tau + minseglen - 1 ## correcting for the fact that we are starting at minseglen
if(extrainf){
out <- c(tau,null,taulike)
names(out) = c("cpt","null","alt")
return(out)
}else{
return(tau)
}
}

if(is.null(dim(data))){
cpt = singledim(data,size,extrainf,minseglen)
}else{
rep = nrow(data)
n = ncol(data)
if(!extrainf){
cpt=rep(0,rep)
for(i in 1:rep){
cpt[i] <- singledim(data[i,], size[i,], extrainf, minseglen)
}
}else{
cpt = matrix(0,ncol=3,nrow=rep)
for(i in 1:rep){
cpt[i,] = singledim(data[i,], size[i,], extrainf, minseglen)
}
colnames(cpt) <- c("cpt","null","alt")
}
return(cpt)
}
}

multiple.meanvar.binomial <- function(data, size, mul.method="PELT",penalty="MBIC",
pen.value=0,Q=5,class=TRUE,param.estimates=TRUE,minseglen){
##checks on data & size
if(length(size)==1){
SIZE <- data*0 + size ## repeat size to the dimension of data & inherit class
}else if(length(data)==length(size)){
if(!is.numeric(size)) stop("Only numeric size allowed")
if(anyNA(size)){stop("Missing value: NA is not allowed in the size as changepoint methods are only sensible for regularly spaced data.")}
SIZE <- size
}else{
stop("Size needs to be either a single number or have the same dimensions of data.")
}
if(any(data<0) || any(size<0)) stop("Binomal test statistic requires postive data and size.")
if(any(data%%1!=0) || any(size%%1!=0)) stop("Binomal test statistic requires integer data and size.")
if(any(data > SIZE)) stop("Binomal test statistic requires data <= size.")

costfunc = "meanvar.binomial"
if(penalty == "MBIC"){
if(mul.method == "SegNeigh"){
stop("MBIC penalty not implemented for SegNeigh method, please choose an alternative penalty")
}
costfunc = paste0(costfunc,".mbic")
}

diffparam = 1
if(is.null(dim(data))){
n = length(data)
}else{
n = ncol(data)
}

if(n<(2*minseglen)) stop("Minimum segment length is too large to include a change in this data")
pen.value = penalty_decision(penalty, pen.value, n, diffparam = 1, asymcheck = costfunc, method = mul.method)
if(is.null(dim(data))){
out = data_input(data=data,method=mul.method, pen.value=pen.value, costfunc=costfunc,
minseglen=minseglen, Q=Q, size=size)
if(class){
return(class_input(data=data, size=SIZE, method=mul.method,
penalty=penalty, pen.value=pen.value,
minseglen=minseglen, param.estimates=param.estimates,
out = out, Q=Q))
}else{
return(out[[2]])
}
}else{
rep = nrow(data)
out = list()
for(i in 1:rep){
out[[i]] = data_input(data=data[i,],method=mul.method, pen.value=pen.value,
costfunc=costfunc, minseglen=minseglen, Q=Q, size=size[i,])
}
cpts = lapply(out,'[[',2)
if(class){
ans = list()
for(i in 1:rep){
if(length(size)==1){ size_out<- size }else{ size_out<-size[i,] }
ans[[i]] = class_input(data=data[i,],size=size_out,
method=mul.method, penalty=penalty,
pen.value=pen.value, minseglen=minseglen,
param.estimates=param.estimates,
out = out[[i]], Q=Q)
}
return(ans)
}else{
return(cpts)
}
}
}

segneigh.meanvar.binomial <- function(data, size=1, Q=5, pen=0){
##nb size is the same dimension of data
##checks on data and size should have already been performed
n <- length(data)
if(n<4){stop('Data must have atleast 4 observations to fit a changepoint model.')}
if(Q>((n/2)+1)){stop(paste('Q is larger than the maximum number of segments',(n/2)+1))}

all.seg=matrix(0,ncol=n,nrow=n)
if(length(size)==1){ SIZE <- data*0 + size }else{SIZE <- size}
for(i in 1:n){
sumx=0
sumsize = 0
for(j in i:n){
sumx=sumx+data[j]
sumsize = sumsize + SIZE[j]
if(sumx==0 || sumx==sumsize){
all.seg[i,j]=-Inf
}
else{
all.seg[i,j]=sumx*log(sumx) - sumsize*log(sumsize) + (sumsize-sumx)*log(sumsize-sumx)
}
}
}
like.Q=matrix(0,ncol=n,nrow=Q)
like.Q[1,]=all.seg[1,]
cp=matrix(NA,ncol=n,nrow=Q)
for(q in 2:Q){
for(j in q:n){
like=NULL
if((j-2-q)<0){v=q}
else{v=(q):(j-2)}
like=like.Q[q-1,v]+all.seg[v+1,j]

like.Q[q,j]= max(like,na.rm=TRUE)
cp[q,j]=which(like==max(like,na.rm=TRUE))[1]+(q-1)
}

}
cps.Q=matrix(NA,ncol=Q,nrow=Q)
for(q in 2:Q){
cps.Q[q,1]=cp[q,n]
for(i in 1:(q-1)){
cps.Q[q,(i+1)]=cp[(q-i),cps.Q[q,i]]
}
}

op.cps=NULL
k=0:(Q-1)

for(i in 1:length(pen)){
criterion=-2*like.Q[,n]+k*pen[i]

op.cps=c(op.cps,which(criterion==min(criterion,na.rm=T))-1)
}
if(op.cps==(Q-1)){warning('The number of segments identified is Q, it is advised to increase Q to make sure changepoints have not been missed.')}
if(op.cps==0){cpts=n}
else{cpts=c(sort(cps.Q[op.cps+1,][cps.Q[op.cps+1,]>0]),n)}

return(list(cps=t(apply(cps.Q,1,sort,na.last=TRUE)),cpts=cpts,op.cpts=op.cps,pen=pen,like=criterion[op.cps+1],like.Q=like.Q[,n]))
}

0 comments on commit 32d2e6f

Please sign in to comment.