add check to replace . with _ in alleles

This is to fix #132
thibautjombart · Mar 1, 2016 · f33c01b · f33c01b
1 parent 66273bd
commit f33c01b
Showing 1 changed file with 60 additions and 44 deletions.
diff --git a/R/import.R b/R/import.R
@@ -19,76 +19,81 @@
 ## Function df2genind
 ######################
 #' Convert a data.frame of allele data to a genind object.
-#'
-#' The function \code{df2genind} converts a data.frame (or a matrix) into a
-#' \linkS4class{genind} object. The data.frame must meet the following
-#' requirements:\cr
-#' - genotypes are in row (one row per genotype)\cr
-#' - markers/loci are in columns\cr
-#' - each element is a string of characters coding alleles, ideally separated by a character string (argument \code{sep});
-#' if no separator is used, the number of characters coding alleles must be indicated (argument \code{ncode}).\cr
-#'
-#' See \code{\link{genind2df}} to convert \linkS4class{genind} objects back to such a
-#' data.frame.
-#'
-#' === Details for the \code{sep} argument ===\cr this character is directly
-#' used in reguar expressions like \code{gsub}, and thus require some
-#' characters to be preceeded by double backslashes. For instance, "/" works
-#' but "|" must be coded as "\\|".
-#'
+#' 
+#' The function \code{df2genind} converts a data.frame (or a matrix) into a 
+#' \linkS4class{genind} object. The data.frame must meet the following 
+#' requirements:
+#' \itemize{
+#' \item genotypes are in row (one row per genotype)
+#' \item markers/loci are in columns
+#' \item each element is a string of characters coding alleles, ideally
+#' separated by a character string (argument \code{sep}); if no separator is
+#' used, the number of characters coding alleles must be indicated (argument
+#' \code{ncode}).}
+#' 
+#' See \code{\link{genind2df}} to convert \linkS4class{genind} objects back to
+#' such a data.frame.
+#' 
+#' === Details for the \code{sep} argument ===\cr this character is directly 
+#' used in reguar expressions like \code{gsub}, and thus require some characters
+#' to be preceeded by double backslashes. For instance, "/" works but "|" must
+#' be coded as "\\|".
+#' 
 #' @aliases df2genind
-#' @param X a matrix or a data.frame containing allelle data only (see
+#' @param X a matrix or a data.frame containing allelle data only (see 
 #'   decription)
 #' @param sep a character string separating alleles. See details.
-#' @param ncode an optional integer giving the number of characters used for
-#'   coding one genotype at one locus. If not provided, this is determined from
+#' @param ncode an optional integer giving the number of characters used for 
+#'   coding one genotype at one locus. If not provided, this is determined from 
 #'   data.
-#' @param ind.names optinal, a vector giving the individuals names; if NULL, taken
-#' from rownames of X. If factor or numeric, vector is converted to character.
-#' @param loc.names an optional character vector giving the markers names; if
+#' @param ind.names optinal, a vector giving the individuals names; if NULL,
+#'   taken from rownames of X. If factor or numeric, vector is converted to
+#'   character.
+#' @param loc.names an optional character vector giving the markers names; if 
 #'   NULL, taken from colnames of X.
 #' @param pop an optional factor giving the population of each individual.
-#' @param NA.char a character string corresponding to missing allele (to be treated as NA)
+#' @param NA.char a character string corresponding to missing allele (to be
+#'   treated as NA)
 #' @param ploidy an integer indicating the degree of ploidy of the genotypes.
-#' @param type a character string indicating the type of marker: 'codom' stands
-#'   for 'codominant' (e.g. microstallites, allozymes); 'PA' stands for
+#' @param type a character string indicating the type of marker: 'codom' stands 
+#'   for 'codominant' (e.g. microstallites, allozymes); 'PA' stands for 
 #'   'presence/absence' markers (e.g. AFLP, RAPD).
-#' @param strata an optional data frame that defines population stratifications
-#'   for your samples. This is especially useful if you have a hierarchical or
+#' @param strata an optional data frame that defines population stratifications 
+#'   for your samples. This is especially useful if you have a hierarchical or 
 #'   factorial sampling design.
 #' @param hierarchy a hierarchical formula that explicitely defines hierarchical
 #'   levels in your strata. see \code{\link{hierarchy}} for details.
-#'
-#' @return an object of the class \linkS4class{genind} for \code{df2genind}; a
-#' matrix of biallelic genotypes for \code{genind2df}
-#'
-#' @author Thibaut Jombart \email{t.jombart@@imperial.ac.uk}, Zhian N. Kamvar
+#'   
+#' @return an object of the class \linkS4class{genind} for \code{df2genind}; a 
+#'   matrix of biallelic genotypes for \code{genind2df}
+#'   
+#' @author Thibaut Jombart \email{t.jombart@@imperial.ac.uk}, Zhian N. Kamvar 
 #'   \email{kamvarz@@science.oregonstate.edu}
-#'
-#' @seealso \code{\link{genind2df}}, \code{\link{import2genind}},
-#'   \code{\link{read.genetix}}, \code{\link{read.fstat}},
+#'   
+#' @seealso \code{\link{genind2df}}, \code{\link{import2genind}}, 
+#'   \code{\link{read.genetix}}, \code{\link{read.fstat}}, 
 #'   \code{\link{read.structure}}
-#'
+#'   
 #' @keywords manip
 #' @examples
-#'
+#' 
 #' ## simple example
 #' df <- data.frame(locusA=c("11","11","12","32"),
 #' locusB=c(NA,"34","55","15"),locusC=c("22","22","21","22"))
 #' row.names(df) <- .genlab("genotype",4)
 #' df
-#'
+#' 
 #' obj <- df2genind(df, ploidy=2, ncode=1)
 #' obj
 #' tab(obj)
-#'
-#'
+#' 
+#' 
 #' ## converting a genind as data.frame
 #' genind2df(obj)
 #' genind2df(obj, sep="/")
-#'
+#' 
 #' @export
-#'
+#' 
 df2genind <- function(X, sep=NULL, ncode=NULL, ind.names=NULL, loc.names=NULL,
                       pop=NULL, NA.char="", ploidy=2, type=c("codom","PA"),
                       strata = NULL, hierarchy = NULL){
@@ -161,7 +166,18 @@ df2genind <- function(X, sep=NULL, ncode=NULL, ind.names=NULL, loc.names=NULL,
         if(length(pop)!= n) stop("length of factor pop differs from nrow(X)")
         pop <- as.factor(pop)
     }
-
+
+    ## check alleles for periods
+    if (length(grep("[.]", X)) > 0L){
+      if (is.null(sep) || sep != "_"){
+        warning("character '.' detected in names of loci; replacing with '_'")
+        replacement <- "_"
+      } else {
+        warning("character '.' detected in names of loci; replacing with 'p'")
+        replacement <- "p"
+      }
+      X <- apply(X, 2, function(i) gsub("[.]", replacement, i))
+    }
 
     ## PRESENCE/ABSENCE MARKERS ##
     if(toupper(type)=="PA"){