Skip to content

Commit

Permalink
add check to replace . with _ in alleles
Browse files Browse the repository at this point in the history
This is to fix #132
  • Loading branch information
zkamvar committed Mar 1, 2016
1 parent 66273bd commit f33c01b
Showing 1 changed file with 60 additions and 44 deletions.
104 changes: 60 additions & 44 deletions R/import.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,76 +19,81 @@
## Function df2genind
######################
#' Convert a data.frame of allele data to a genind object.
#'
#' The function \code{df2genind} converts a data.frame (or a matrix) into a
#' \linkS4class{genind} object. The data.frame must meet the following
#' requirements:\cr
#' - genotypes are in row (one row per genotype)\cr
#' - markers/loci are in columns\cr
#' - each element is a string of characters coding alleles, ideally separated by a character string (argument \code{sep});
#' if no separator is used, the number of characters coding alleles must be indicated (argument \code{ncode}).\cr
#'
#' See \code{\link{genind2df}} to convert \linkS4class{genind} objects back to such a
#' data.frame.
#'
#' === Details for the \code{sep} argument ===\cr this character is directly
#' used in reguar expressions like \code{gsub}, and thus require some
#' characters to be preceeded by double backslashes. For instance, "/" works
#' but "|" must be coded as "\\|".
#'
#'
#' The function \code{df2genind} converts a data.frame (or a matrix) into a
#' \linkS4class{genind} object. The data.frame must meet the following
#' requirements:
#' \itemize{
#' \item genotypes are in row (one row per genotype)
#' \item markers/loci are in columns
#' \item each element is a string of characters coding alleles, ideally
#' separated by a character string (argument \code{sep}); if no separator is
#' used, the number of characters coding alleles must be indicated (argument
#' \code{ncode}).}
#'
#' See \code{\link{genind2df}} to convert \linkS4class{genind} objects back to
#' such a data.frame.
#'
#' === Details for the \code{sep} argument ===\cr this character is directly
#' used in reguar expressions like \code{gsub}, and thus require some characters
#' to be preceeded by double backslashes. For instance, "/" works but "|" must
#' be coded as "\\|".
#'
#' @aliases df2genind
#' @param X a matrix or a data.frame containing allelle data only (see
#' @param X a matrix or a data.frame containing allelle data only (see
#' decription)
#' @param sep a character string separating alleles. See details.
#' @param ncode an optional integer giving the number of characters used for
#' coding one genotype at one locus. If not provided, this is determined from
#' @param ncode an optional integer giving the number of characters used for
#' coding one genotype at one locus. If not provided, this is determined from
#' data.
#' @param ind.names optinal, a vector giving the individuals names; if NULL, taken
#' from rownames of X. If factor or numeric, vector is converted to character.
#' @param loc.names an optional character vector giving the markers names; if
#' @param ind.names optinal, a vector giving the individuals names; if NULL,
#' taken from rownames of X. If factor or numeric, vector is converted to
#' character.
#' @param loc.names an optional character vector giving the markers names; if
#' NULL, taken from colnames of X.
#' @param pop an optional factor giving the population of each individual.
#' @param NA.char a character string corresponding to missing allele (to be treated as NA)
#' @param NA.char a character string corresponding to missing allele (to be
#' treated as NA)
#' @param ploidy an integer indicating the degree of ploidy of the genotypes.
#' @param type a character string indicating the type of marker: 'codom' stands
#' for 'codominant' (e.g. microstallites, allozymes); 'PA' stands for
#' @param type a character string indicating the type of marker: 'codom' stands
#' for 'codominant' (e.g. microstallites, allozymes); 'PA' stands for
#' 'presence/absence' markers (e.g. AFLP, RAPD).
#' @param strata an optional data frame that defines population stratifications
#' for your samples. This is especially useful if you have a hierarchical or
#' @param strata an optional data frame that defines population stratifications
#' for your samples. This is especially useful if you have a hierarchical or
#' factorial sampling design.
#' @param hierarchy a hierarchical formula that explicitely defines hierarchical
#' levels in your strata. see \code{\link{hierarchy}} for details.
#'
#' @return an object of the class \linkS4class{genind} for \code{df2genind}; a
#' matrix of biallelic genotypes for \code{genind2df}
#'
#' @author Thibaut Jombart \email{t.jombart@@imperial.ac.uk}, Zhian N. Kamvar
#'
#' @return an object of the class \linkS4class{genind} for \code{df2genind}; a
#' matrix of biallelic genotypes for \code{genind2df}
#'
#' @author Thibaut Jombart \email{t.jombart@@imperial.ac.uk}, Zhian N. Kamvar
#' \email{kamvarz@@science.oregonstate.edu}
#'
#' @seealso \code{\link{genind2df}}, \code{\link{import2genind}},
#' \code{\link{read.genetix}}, \code{\link{read.fstat}},
#'
#' @seealso \code{\link{genind2df}}, \code{\link{import2genind}},
#' \code{\link{read.genetix}}, \code{\link{read.fstat}},
#' \code{\link{read.structure}}
#'
#'
#' @keywords manip
#' @examples
#'
#'
#' ## simple example
#' df <- data.frame(locusA=c("11","11","12","32"),
#' locusB=c(NA,"34","55","15"),locusC=c("22","22","21","22"))
#' row.names(df) <- .genlab("genotype",4)
#' df
#'
#'
#' obj <- df2genind(df, ploidy=2, ncode=1)
#' obj
#' tab(obj)
#'
#'
#'
#'
#' ## converting a genind as data.frame
#' genind2df(obj)
#' genind2df(obj, sep="/")
#'
#'
#' @export
#'
#'
df2genind <- function(X, sep=NULL, ncode=NULL, ind.names=NULL, loc.names=NULL,
pop=NULL, NA.char="", ploidy=2, type=c("codom","PA"),
strata = NULL, hierarchy = NULL){
Expand Down Expand Up @@ -161,7 +166,18 @@ df2genind <- function(X, sep=NULL, ncode=NULL, ind.names=NULL, loc.names=NULL,
if(length(pop)!= n) stop("length of factor pop differs from nrow(X)")
pop <- as.factor(pop)
}


## check alleles for periods
if (length(grep("[.]", X)) > 0L){
if (is.null(sep) || sep != "_"){
warning("character '.' detected in names of loci; replacing with '_'")
replacement <- "_"
} else {
warning("character '.' detected in names of loci; replacing with 'p'")
replacement <- "p"
}
X <- apply(X, 2, function(i) gsub("[.]", replacement, i))
}

## PRESENCE/ABSENCE MARKERS ##
if(toupper(type)=="PA"){
Expand Down

0 comments on commit f33c01b

Please sign in to comment.