diff --git a/build.bash b/build.bash new file mode 100644 index 0000000..97a86b0 --- /dev/null +++ b/build.bash @@ -0,0 +1,32 @@ +#!/bin/bash + +R=R +if [ ${#} -gt 0 ]; then + if [ "$1" = "-dev" ]; then + R=Rdev + fi +fi + +echo "######## Removing building information..." +rm -rf output + +echo "######## Copying DESCRIPTION and NAMESPACE to pkg directory..." +cp build/DESCRIPTION pkg +cp build/NAMESPACE pkg + +echo "######## Generate documentation..." +$R -q -f roxygen.R + +echo "######## Building package in output..." +mkdir output +cd output +$R CMD build ../pkg +echo "######## Testing package..." +for x in *.tar.gz +do + $R CMD check --as-cran $x +done + +echo "**BUILT USING $R" +$R --version + diff --git a/build/DESCRIPTION b/build/DESCRIPTION new file mode 100644 index 0000000..64ff506 --- /dev/null +++ b/build/DESCRIPTION @@ -0,0 +1,14 @@ +Package: extremevalues +Maintainer: Mark van der Loo +License: GPL-3 +Title: Univariate outlier detection using model distributions. +LazyData: no +Type: Package +LazyLoad: yes +Author: Mark van der Loo +Description: hallo +Version: 3.0-0 +Depends: R (>= 2.13.0) +URL: https://github.com/markvanderloo/rspa +Date: 2012-07-15 +Suggests: knitr, testthat diff --git a/build/NAMESPACE b/build/NAMESPACE new file mode 100644 index 0000000..e69de29 diff --git a/document.bash b/document.bash new file mode 100644 index 0000000..d1159f6 --- /dev/null +++ b/document.bash @@ -0,0 +1,6 @@ +#!/bin/bash + +R -f roxygen.R +R CMD Rd2pdf --force --no-preview -o manual.pdf ./pkg + + diff --git a/examples/plotpositions.R b/examples/plotpositions.R new file mode 100644 index 0000000..ce1bfa3 --- /dev/null +++ b/examples/plotpositions.R @@ -0,0 +1,10 @@ + +# simple plot positions +plotpositions(c(1,3,5)) + +# plot positions: duplicates get mean position +plotpositions(c(1,3,5,3)) + +# plot positions: duplicates get original position +plotpositions(c(1,3,5,3),duplicates='asis') + diff --git a/pkg/NEWS b/pkg/NEWS new file mode 100644 index 0000000..2bb0049 --- /dev/null +++ b/pkg/NEWS @@ -0,0 +1,33 @@ +version 3.0-0 +- BREAKING CHANGE: Complete rewrite of the package. +- plot positions of duplicates now get mean plot position by default. + +version 2.2 +- Fixed minor bug in namespace file (thanks to Duncan Murdoch for pointing that out) +- Changed dependancy on gWidgets and gWidgetstcltk to "suggest" for faster loading. +- Changed documentation of returned R2-value (thanks to Charles Lobos for useful discussion and remarks) +- Changed calculation of variance of residuals for method II (slightly underestimated previously, thanks to Javier Orche Galindo for noting this) +- Fixed a bug causing extimated parameters in method II/pareto not to be passed to output. + +version 2.1 +- Killed a bug in normal distribution / method II +- Updated references, some typo's in the man pages +- Added version 0.1 of GUI interface +- Small stuff in helpfiles + +version 2.0 +- getOutliers.r - Changed rho default, added left limit, added some input + checks +- getLognormalLimit.r - added left limit +- getExponentialLimit.r - added left limit +- getWeibullLimit.r - added left limit +- getNormalLimit.r - added left limit +- getParetoLimit.r - added left limit +- Added Method II +- Renaming main functions +- Revised and extended plotting capabilities +- Major overhaul of the help files +- Fixed a bug in the exponential fit +- Changed to Makkonen's equation for plot positions + + diff --git a/pkg/R/.R b/pkg/R/.R new file mode 100644 index 0000000..7454357 --- /dev/null +++ b/pkg/R/.R @@ -0,0 +1,6 @@ + + +source('plotpositions.R') + + + diff --git a/pkg/R/.plotpositions.R.swp b/pkg/R/.plotpositions.R.swp new file mode 100644 index 0000000..65de6d5 Binary files /dev/null and b/pkg/R/.plotpositions.R.swp differ diff --git a/pkg/R/extremevalues.R b/pkg/R/extremevalues.R new file mode 100644 index 0000000..f76c79d --- /dev/null +++ b/pkg/R/extremevalues.R @@ -0,0 +1,6 @@ +#' A package for univariate outlier detection +#' +#' @name extremevalues +#' @docType package +#' @useDynLib extremevalues +{} diff --git a/pkg/R/plotpositions.R b/pkg/R/plotpositions.R new file mode 100644 index 0000000..2ed2d06 --- /dev/null +++ b/pkg/R/plotpositions.R @@ -0,0 +1,44 @@ + +all_finite <- function(x){ + stopifnot(is.numeric(x)) + storage.mode(x) <- "double" + .Call("all_finite_double",x) +} + +#' Compute QQ-plotpositions +#' +#' @section Details: +#' The QQ-plot indices are computed as as follows. For each \eqn{x_i} the rank \eqn{j_i} is determined. +#' The plot index for \eqn{x_i} is then given by \eqn{i/(N+1)}. When \eqn{\boldsymbol{x}} has duplicate +#' entries, plot indices for the duplicates are computed for each duplicate and replaced with the mean (by default). +#' Passing \code{duplicates='asis'}, the original rank is used. +#' +#' +#' @param x numeric vector. An error is thrown when \code{x} contains nonnumerical or nonfinite values. +#' @param duplicates A character constant determining how duplicate entries are indexed (see details). +#' @return plot positions \eqn{0< p < 1} for the values in \code{x}. +#' @export +#' @example ../examples/plotpositions.R +#' +plotpositions <- function(x,duplicates=c('mean','asis')){ + stopifnot( + is.numeric(x), + all_finite(x) + ) + duplicates <- match.arg(duplicates) + N <- length(x) + i <- order(x) + if (identical(duplicates,'asis')){ + pp <- (1:N)/(N+1) + return(pp[i]) + } else { + storage.mode(x) <- "double" + pp <- .Call("R_plotpositions",x[i]) + } + pp[i] +} + + + + + diff --git a/pkg/inst/testPlotpositions.R b/pkg/inst/testPlotpositions.R new file mode 100644 index 0000000..bdc6d6f --- /dev/null +++ b/pkg/inst/testPlotpositions.R @@ -0,0 +1,22 @@ + +library(testthat) + +context('QQ-plot positions') +test_that('plot postion results', { + expect_equal( plotpositions(1), 0.5 ) + expect_equal( plotpositions(c(1,1), c(0.5,0.5))) + expect_equal( + round(plotpositions(c(1,2)),8), + round(c(1/3,2/3),8) + ) + expect_equal( + round(plotpositions(1,1,duplicates='asis'),8), + round(c(1/3,2/3),8) + ) +}) + +test_that('plot position errors',{ + expect_error( plotposition(NA) ) + expect_error( plotposition(NaN) ) +}) + diff --git a/pkg/src/.fuse_hidden0000082800000002 b/pkg/src/.fuse_hidden0000082800000002 new file mode 100644 index 0000000..26ca1c4 Binary files /dev/null and b/pkg/src/.fuse_hidden0000082800000002 differ diff --git a/pkg/src/R_all_finite.c b/pkg/src/R_all_finite.c new file mode 100644 index 0000000..d426bda --- /dev/null +++ b/pkg/src/R_all_finite.c @@ -0,0 +1,25 @@ + +#include +#include + +SEXP all_finite_double(SEXP x){ + PROTECT(x); + double *xx = REAL(x); + + SEXP y; + PROTECT(y = allocVector(LGLSXP,1)); + + int i, b = 1; + for (i=0; i +#include +#include "plotpositions.h" + +SEXP R_plotpositions(SEXP x){ + PROTECT(x); + + SEXP I; + PROTECT(I = allocVector(REALSXP, length(x))); + + plotpositions(REAL(x), length(x), REAL(I)); + + UNPROTECT(2); + + return I; +} + + + + diff --git a/pkg/src/plotpositions.c b/pkg/src/plotpositions.c new file mode 100644 index 0000000..751bb52 --- /dev/null +++ b/pkg/src/plotpositions.c @@ -0,0 +1,55 @@ + +// Sum of sequence i, i+1,...,j. +// - "double" arithmetic is used to avoid integer overflow. +double seq_sum(int i, int j){ + int k = ( (j - i) + 1 ); + int m = k / 2; + double u, di = (double) i, dj = (double) j; + if ( k % 2 == 0 ){ + u = m * (dj + di); + } else { + u = m * (dj + di ) + dj - m; + } + return u; +} + + +// QQ-plot positions, replacing double entries with the mean position. +// - Positions determined as described in Makkonen (2008). +// - It is assumed that x is sortend in ascending order! +void plotpositions(double *x, int n, double *I){ + int i = 0, j, k = 1; + double m, pp; + int N = n + 1; + while( i < n ){ + // find number (m) and index (i,i+1,...,j) of duplicates + j = i+1; + while ( x[j] == x[i] ){ + j++; + }; + m = (double) j-i; + // compute mean QQ-plot position and assign + pp = seq_sum(i+1,j)/(m*N); + for ( k = i; k < j; k++ ){ + I[k] = pp; + } + // shift index + i = j; + } + +} + +/* example: uncomment to run. +#include +void main(){ + double x[4] = {1.0,3.0,3.0,4.0}; + double I[4]; + + plotpositions(x, 4, I); + for (int i=0; i<4; i++){ + printf("%4.2f, %4.2f", x[i], I[i]); + printf("\n"); + } + +} +*/ diff --git a/pkg/src/plotpositions.h b/pkg/src/plotpositions.h new file mode 100644 index 0000000..87613e1 --- /dev/null +++ b/pkg/src/plotpositions.h @@ -0,0 +1,10 @@ + +#ifndef ev_plotpositions_h +#define ev_plotpositions_h + +void plotpositions(double *, int, double *); + +#endif + + + diff --git a/pkg/tests/testpackage.R b/pkg/tests/testpackage.R new file mode 100644 index 0000000..654c58b --- /dev/null +++ b/pkg/tests/testpackage.R @@ -0,0 +1,5 @@ + +if ( require(testthat) ) + test_package("rspa") + + diff --git a/roxygen.R b/roxygen.R new file mode 100644 index 0000000..4854ce8 --- /dev/null +++ b/roxygen.R @@ -0,0 +1,14 @@ +library(roxygen2) +options(error=traceback) +unlink( 'pkg/man', TRUE) + +setwd('pkg') +roxygenize( '.' + , roxygen.dir='.' + , copy.package=FALSE + , unlink.target=TRUE + ) + +if (length(list.files('inst/doc')) == 0){ + unlink( 'inst/doc', TRUE) +} diff --git a/test/autotest.R b/test/autotest.R new file mode 100644 index 0000000..5225ce2 --- /dev/null +++ b/test/autotest.R @@ -0,0 +1,7 @@ +library(testthat) + + + +dyn.load("../pkg/src/plotpositions.so") +auto_test("../pkg/R", "../pkg/inst/tests") +