Skip to content

Commit

Permalink
Added documentation files (man/*.Rd).
Browse files Browse the repository at this point in the history
  • Loading branch information
pcarbo committed Jul 15, 2013
1 parent 4fc5dd2 commit 9b09bea
Show file tree
Hide file tree
Showing 26 changed files with 1,522 additions and 0 deletions.
96 changes: 96 additions & 0 deletions man/aicVC.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
\name{aicVC}
\alias{aicVC}
\title{
AIC Model Selection
}
\description{
Select genetic variance components via Akaike's information criterion (AIC).
}
\usage{
aicVC(y,x,v=vector("list",6),initpar,k=2,init=6,keep=6,
direction=c("forward","backward"),nit=25,verbose=FALSE,
method=c("Nelder-Mead","BFGS","CG","SANN"),control=list(),
hessian=FALSE)
}
\arguments{
\item{y}{
a numeric vector or a numeric matrix of one column (representing a phenotype for instance).
}
\item{x}{
a data frame or matrix, representing covariates if not missing.
}
\item{v}{
a list of variance components (AA, DD, HH, AD, MH, EE, ...) where "AA" and "DD" are respectively additive and dominance genetic matrices, "HH", "AD" and "MH" are other genetic matrices that one may be interested in (see "details"), "EE" is the residual matrix that is usually assumed to be an identity matrix, and "..." are other random components of interest. If a genetic component is not considered, it should be set to NULL.
}
\item{initpar}{
optional initial parameter values.
}
\item{k}{
penalty on a parameter. The selection criterion is the known "AIC" if \code{k = 2} and is "BIC" if \code{k = log(n)} where "n" is the sample size.
}
\item{init}{
indicator of the initial model pertaining to genetic variance components. For instance, c(1,2,6) indicates the initial model includes "AA", "DD" and "EE". By default, only "EE" is considered.
}
\item{keep}{
indicator of which variance components should be forced into the final model. The default is "EE".
}
\item{direction}{
the mode of search. Either "forward" or "backward" with default "forward".
}
\item{nit}{
number of iterations to call \code{\link{optim}} for optimization.
}
\item{verbose}{
a logical variable. True if ones wants to track the process for monitoring purpose.
}
\item{method}{
the optimization method to be used. See \code{\link{optim}} for details.
}
\item{control}{
a list of control parameters to be passed to \code{\link{optim}}.
}
\item{hessian}{
logical. Should a numerically differentiated Hessian matrix be returned?
}
}
\details{
In genome-wide association studies (GWAS), random effects are usually added to a model to account for polygenic variation. Abney et al (2000) showed that five variance components including the most interesting additive and dominance variance components are potentially induced by polygenes. The above function is intended for selecting variance components that contribute "most" to a quantitative trait.
Function \code{\link{estVC}} is called by the above function to estimate the parameters and maximum likelihood in each model. Refer to \code{\link{estVC}} for more information.
}
\value{
\item{aic}{AIC of the final model.}
\item{model}{gives parameter estimates, log-likihood, and other information.}
\item{lik}{log-likelihood of the model selected at each intermediate step.}
\item{trace}{indicates which variance components were selected at each intermediate step.}
}
\references{
Abney, M., M. S. McPeek, and C. Ober (2000). Estimation of variance components of quantitative traits in inbred populations. Am. J. Hum. Genet. 141, 629-650.
}
\seealso{
\code{\link{estVC}} for more information.
}
\examples{
data(miscEx)
\dontrun{
# forward selection
# any variance component will be selected
# if AIC improve by 1e-5 or larger
v=list(AA=gmF8$AA, DD=gmF8$DD, HH=NULL, AD=NULL,
MH=NULL, EE=diag(length(pdatF8$bwt)))
o<- aicVC(y=pdatF8$bwt, x=pdatF8$sex, k=0, v=v, verbose=TRUE)
o
# forward selection
of<- aicVC(y=pdatF8$bwt, x=pdatF8$sex, v=v, k=1/2, init=6,
direction="for", verbose=TRUE)
of
# backward elimination
ob<- aicVC(y=pdatF8$bwt, x=pdatF8$sex, v=v, k=1/2, keep=6,
direction="back", verbose=TRUE)
ob
}
}
36 changes: 36 additions & 0 deletions man/blup.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
\name{blup}
\alias{blup}
\title{
Best Linear Unbiased Prediction
}
\description{
Estimate the best linear unbiased prediction (BLUP) for various effects in the model.
}
\usage{
blup(object)
}
\arguments{
\item{object}{
an object from \code{\link{estVC}} or \code{\link{aicVC}}.
}
}
\value{
\item{fixed}{BLUP for fixed effects.}
\item{AA,DD,\dots}{BLUP for random (genetic) variance components "AA", "DD", \dots}
\item{EE}{BLUP for residual effect.}
}

\seealso{
\code{\link{estVC}} and \code{\link{aicVC}}.
}
\examples{
data(miscEx)

\dontrun{
# only consider additive genetic variance component
o<- estVC(y=pdatF8$bwt, x=pdatF8$sex, v=list(AA=gmF8$AA,DD=gmF8$DD,
HH=NULL, AD=NULL, MH=NULL, EE=diag(length(pdatF8$bwt))))
b<- blup(o)
}
}

66 changes: 66 additions & 0 deletions man/cic.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
\name{cic}
\alias{cic}
\title{
Calculate Jacquard condensed identity coefficients
}
\description{
Calculate Jacquard condensed identity coefficients from a pedigree.
}
\usage{
cic(ped,ids,inter,df=3,ask=FALSE,verbose=FALSE)
}
\arguments{
\item{ped}{
a pedigree, which is a data frame (id, sire, dam, ...). If given, "generation" can be numeric 0, 1, 2, ... or non-numeric "F0", "F1", "F2", ... If "sex" is included, male should be "M", "Male" or 1, and female should be "F", "Female" or 2 (other than 0 and 1). Note that 0 is reserved for missing values.
}
\item{ids}{
IDs of the individuals for which to calculate the Jacquard condensed identity coefficients. If missing, all individuals in the pedigree \code{ped} will be considered.
}
\item{inter}{
if given, it should indicate intermediate generations.
}
\item{df}{
if \code{inter} is missing, \code{df} is used to derive (optimal) \code{inter}. If \code{df = 0}, then there will no intermediate generations. If \code{df} is large, then all generations will be used as intermediate generations.
}
\item{ask}{
if true, users will be asked whether to proceed.
}
\item{verbose}{
if true, will print out some messages.
}
}
\details{
The coefficients will be calculated for individuals with IDs specified by \code{ids}. All individuals will be considered if \code{ids} is missing. This is not recommended if the total number of individuals in the pedigree is large. Instead, it is recommended that \code{ids} is specified for interested individuals only

\code{df} is a tuning parameter. It should not be 0 (or smaller than 1) if the pedigree is large in depth (many generations) but the number of individuals is not small; otherwise, it can take forever to finish. It should not be \code{Inf} (or a large number) if the number of individuals in certain intermediate generation is very large.
}
\value{
A matrix G with G[,j] being the j-th Jacquard identity coefficients.
}
\references{
Abney, M., M. S. McPeek, and C. Ober (2000). Estimation of variance components of quantitative traits in inbred populations. Am. J. Hum. Genet. 141, 629-650.
}
\note{
You may need the administrative privilege to run this function on systems such as Windows 7. It may require your operating system support "long long" integer type in C++. If you run this function in a windows system, make sure the working directory is under system volume C and you have the write privilege.

It is better to remove the working directory if the program is interrupted by external forces (e.g. killed by users).

Warning: you may need to run this program on a 64-bit machine in case of seeing such a message!
}
\examples{
data(miscEx)

ids<- sample(pedF8$id[300:500],20)

\dontrun{
# run 'cic' for the sampled individuals
# top-down
oo<- cic(pedF8, ids=ids, df=Inf, verbose=TRUE)
# bottom-up
o0<- cic(pedF8, ids=ids, df=0, verbose=TRUE)
# hybrid of top-down and bottom-up
o2<- cic(pedF8, ids=ids, ask=TRUE, verbose=TRUE)
# same results
c(sum(abs(oo-o0) >1e-7),sum(abs(o2-o0) >1e-7))
}
}
80 changes: 80 additions & 0 deletions man/estVC.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
\name{estVC}
\alias{estVC}
\title{
Estimate Variance Component Parameters
}
\description{
Estimate model parameters for covariates, genetic variance components and residual effect.
}
\usage{
estVC(y,x,v=vector("list",6),initpar,nit=25,
method=c("Nelder-Mead","BFGS","CG","SANN"),
control=list(),hessian=FALSE)
}
\arguments{
\item{y}{
a numeric vector or a numeric matrix of one column (representing a phenotype for instance).
}
\item{x}{
a data frame or matrix, representing covariates if not missing.
}
\item{v}{
a list of variance components (AA, DD, HH, AD, MH, EE,...), where "AA" and "DD" are respectively additive and dominance genetic matrices, "HH", "AD" and "MH" are other genetic matrices that one may be interested in (see \code{\link{aicVC}}), "EE" is the residual matrix that is usually assumed to be an identity matrix, and "..." are other random components of interest. If a genetic component is not considered, it should be set to NULL.
}
\item{initpar}{
optional initial parameter values.
}
\item{nit}{
number of iterations to call \code{\link{optim}} for optimization.
}
\item{method}{
the optimization method to be used. See \code{\link{optim}} for details.
}
\item{control}{
a list of control parameters to be passed to \code{\link{optim}}.
}
\item{hessian}{
logical. Should a numerically differentiated Hessian matrix be returned?
}
}
\details{
The optimization function \code{\link{optim}} is adopted in the above function to estimate the parameters and maximum likelihood. Several optimization methods are available for the optimization algorithm in \code{\link{optim}}, but we recommend "Nelder-Mead" for the sake of stability. Alternatively, one may choose other options, e.g., "BFGS" to initialize and speed up the estimation procedure and then the procedure will automatically turn to "Nelder-Mead" for final results.

Normality is assumed for the random effects. Input data should be free of missing values.
}
\value{
\item{par}{estimates of the model parameters.}
\item{value}{log-likelihood of the model.}
\item{y}{y used.}
\item{x}{associated with x used.}
\item{v}{variance component matrices v used.}
\item{\dots}{other information.}
}

\seealso{
\code{\link{optim}} and \code{\link{rem}}.
}
\examples{
data(miscEx)

\dontrun{
# no sex effect
o<- estVC(y=pdatF8$bwt, v=list(AA=gmF8$AA,DD=gmF8$DD,
HH=NULL, AD=NULL, MH=NULL, EE=diag(length(pdatF8$bwt))))
o

# sex as fixed effect
fo<- estVC(y=pdatF8$bwt, x=pdatF8$sex, v=list(AA=gmF8$AA,DD=gmF8$DD,
HH=NULL, AD=NULL, MH=NULL, EE=diag(length(pdatF8$bwt))))
fo
2*(fo$value-o$value) # log-likelihood test statistic

# sex as random effect
SM<- rem(~sex, data=pdatF8)
ro<- estVC(y=pdatF8$bwt, v=list(AA=gmF8$AA,DD=gmF8$DD,
HH=NULL, AD=NULL, MH=NULL, SE=SM$sex, EE=diag(length(pdatF8$bwt))))
ro
2*(ro$value-o$value) # log-likelihood test statistic
}
}

42 changes: 42 additions & 0 deletions man/genMatrix.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
\name{genMatrix}
\alias{genMatrix}
\title{
Derive genetic matrices
}
\description{
Derive genetic matrices from Jacquard condensed identity coefficients or genotypic data.
}
\usage{
genMatrix(x)
}
\arguments{
\item{x}{
an object of \code{\link{cic}} or \code{\link{ibs}}, or genotypic data in a matrix or a data frame with each row representing an observation and each column a marker locus and entry being "AA", "AB", "BB" (or 1, 2, 3) without missing genotypes.
}
}
\value{
\item{AA}{additive genetic matrix.}
\item{DD}{dominance genetic matrix.}
\item{AD,HH,MH}{other three genetic matrices (see Abney et. al. 2000).}
\item{ib}{inbreeding coefficients.}
}
\references{
Abney, M., M. S. McPeek, and C. Ober (2000). Estimation of variance components of quantitative traits in inbred populations. Am. J. Hum. Genet. 141, 629-650.
}
\seealso{
\code{\link{cic}}
}
\examples{
data(miscEx)

ids<- sample(pedF8$id[300:500],20)

\dontrun{
# get condensed identity coefficients
oo<- cic(pedF8, ids=ids, df=0)
ksp<- kinship(pedF8, ids=ids) # kinship coefficients only
# extract genetic matrices
gm<- genMatrix(oo)
sum((gm$AA-2*ksp)>1e-7) # same results
}
}
Loading

0 comments on commit 9b09bea

Please sign in to comment.