Skip to content

Commit

Permalink
Merge pull request #7 from smgogarten/allow_fork
Browse files Browse the repository at this point in the history
Allow fork
  • Loading branch information
smgogarten authored Feb 7, 2020
2 parents d8c1417 + 1b8cafa commit 8cdf2c5
Show file tree
Hide file tree
Showing 18 changed files with 122 additions and 61 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
Package: GWASTools
Version: 1.33.1
Version: 1.33.2
Type: Package
Title: Tools for Genome Wide Association Studies
Description: Classes for storing very large GWAS data sets and annotation, and functions for GWAS data cleaning and analysis.
Author: Stephanie M. Gogarten, Cathy Laurie, Tushar Bhangale, Matthew P. Conomos, Cecelia Laurie, Michael Lawrence, Caitlin McHugh, Ian Painter, Xiuwen Zheng, Jess Shen, Rohit Swarnkar, Adrienne Stilp, Sarah Nelson
Maintainer: Stephanie M. Gogarten <sdmorris@uw.edu>
Depends: Biobase
Imports: graphics, stats, utils, methods, gdsfmt, DBI, RSQLite, GWASExactHW, DNAcopy, survival, sandwich, lmtest, logistf, quantsmooth
Suggests: ncdf4, GWASdata, BiocGenerics, RUnit, Biostrings, GenomicRanges, IRanges, SNPRelate, snpStats, S4Vectors, VariantAnnotation
Imports: graphics, stats, utils, methods, gdsfmt, DBI, RSQLite, GWASExactHW, DNAcopy, survival, sandwich, lmtest, logistf, quantsmooth, dplyr
Suggests: ncdf4, GWASdata, BiocGenerics, RUnit, Biostrings, GenomicRanges, IRanges, SNPRelate, snpStats, S4Vectors, VariantAnnotation, parallel
License: Artistic-2.0
URL: https://github.com/smgogarten/GWASTools
LazyData: yes
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ importFrom(sandwich, vcovHC, vcovHC.default) # used in assocRegression
importFrom(lmtest, lrtest, lrtest.default) # used in assocRegression
importFrom(logistf, logistf) # used in assocRegression
importFrom(quantsmooth, paintCytobands, lengthChromosome) # used to plot ideograms
importFrom(dplyr, bind_rows)

exportClasses(
SnpAnnotationDataFrame,
Expand Down
8 changes: 4 additions & 4 deletions R/LOHselectAnoms.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ for(j in 1:n1) {
right<-max(segs$right[ind])
tmp<-data.frame(left,right)
names(tmp)<-c("left","right")
an.comb<-rbind(an.comb,tmp) }
an.comb<-bind_rows(an.comb,tmp) }
ws<-setdiff(w,mod.ind)
an.nomod<-segs[ws,c("left","right")]
tmp2<-rbind(an.nomod,an.comb)
tmp2<-bind_rows(an.nomod,an.comb)
flag<-1;out<-list(flag,tmp2)
names(out)<-c("flag","anoms")
return(out) } #end function mmerge
Expand Down Expand Up @@ -67,7 +67,7 @@ tmpdown<-NULL
flag<-FALSE
if(length(wup)!=0) { if(resup$flag==1) flag<-TRUE}
if(length(wdown)!=0) { if (resdown$flag==1) flag<-TRUE}
out<-rbind(rest,tmpup,tmpdown)
out<-bind_rows(rest,tmpup,tmpdown)
out<-out[order(out$left),]
rout<-list(out,flag)
names(rout)<-c("newsegs","flag")
Expand Down Expand Up @@ -144,7 +144,7 @@ for(i in 1:dim(RUNS)[1]) {
if(over<=0)next #no overlap
tp<-data.frame(mxL,mnR)
names(tp)<-c("left","right")
runsegs<-rbind(runsegs,tp)
runsegs<-bind_rows(runsegs,tp)
}
}

Expand Down
4 changes: 2 additions & 2 deletions R/Methods-GdsGenotypeReader.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Methods for GdsGenotypeReader

GdsGenotypeReader <- function(filename, genotypeDim, genotypeVar, snpIDvar, scanIDvar, ...) {
GdsGenotypeReader <- function(filename, genotypeDim, genotypeVar, snpIDvar, scanIDvar, allow.fork=FALSE, ...) {
if (missing(filename)) stop("filename is required")
if (missing(genotypeVar)) genotypeVar <- "genotype"
if (missing(snpIDvar)) snpIDvar <- "snp.id"
Expand All @@ -9,7 +9,7 @@ GdsGenotypeReader <- function(filename, genotypeDim, genotypeVar, snpIDvar, scan
# GdsReader does not have ... in its argument
#tmpobj <- new("GdsReader", GdsReader(filename))
input.gds <- is(filename, 'gds.class')
tmpobj <- GdsReader(filename)
tmpobj <- GdsReader(filename, allow.fork=allow.fork)

# automatic checking for genotypeDim:
snpDim <- getDimension(tmpobj, snpIDvar)
Expand Down
4 changes: 2 additions & 2 deletions R/Methods-GdsIntensityReader.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Methods for GdsIntensityReader

GdsIntensityReader <- function(filename, ...) {
GdsIntensityReader <- function(filename, allow.fork=FALSE, ...) {
if (missing(filename)) stop("filename is required")

input.gds <- is(filename, 'gds.class')
tmpobj <- GdsReader(filename)
tmpobj <- GdsReader(filename, allow.fork=allow.fork)

tryCatch(new("GdsIntensityReader", tmpobj, ...),
error=function(e) {if (!input.gds) close(tmpobj)
Expand Down
8 changes: 4 additions & 4 deletions R/Methods-GdsReader.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Methods for GdsReader

# constructor
GdsReader <- function(filename) {
GdsReader <- function(filename, allow.fork=FALSE) {
if (missing(filename)) stop("filename is required")
if (is(filename, 'gds.class')) {
input.gds <- TRUE
Expand All @@ -10,7 +10,7 @@ GdsReader <- function(filename) {
} else {
input.gds <- FALSE
if (!file.exists(filename)) stop("Error in opening file ", filename, ": no such file or directory")
handler <- openfn.gds(filename)
handler <- openfn.gds(filename=filename, allow.fork=allow.fork)
}
new("GdsReader", filename=filename, handler=handler)
}
Expand All @@ -27,8 +27,8 @@ setValidity("GdsReader",

setMethod("open",
signature(con = "GdsReader"),
function (con) {
con@handler <- openfn.gds(con@filename)
function (con, ...) {
con@handler <- openfn.gds(con@filename, ...)
})

setMethod("close",
Expand Down
16 changes: 8 additions & 8 deletions R/anomDetectLOH.R
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ anomDetectLOH<-function(intenData, genoData, scan.ids, chrom.ids, snp.ids,
RUNS.snch<-out$RUNS
segs.snch<-out$segments

LOH.base.info<-rbind(LOH.base.info,base.snch)
LOH.segments<-rbind(LOH.segments,segs.snch)
LOH.raw<-rbind(LOH.raw,RUNS.snch)
LOH.base.info<-bind_rows(LOH.base.info,base.snch)
LOH.segments<-bind_rows(LOH.segments,segs.snch)
LOH.raw<-bind_rows(LOH.raw,RUNS.snch)

#### BEGIN filtering process #############

Expand Down Expand Up @@ -161,8 +161,8 @@ anomDetectLOH<-function(intenData, genoData, scan.ids, chrom.ids, snp.ids,
RUNS.snch$chrom.nonanom.mean<-NA
RUNS.snch$chrom.nonanom.sd<-NA
RUNS.snch$sex<-sex[sindex]
LOH.raw.adjusted<-rbind(LOH.raw.adjusted,RUNS.snch)
LOH.filtered<-rbind(LOH.filtered,RUNS.snch)
LOH.raw.adjusted<-bind_rows(LOH.raw.adjusted,RUNS.snch)
LOH.filtered<-bind_rows(LOH.filtered,RUNS.snch)
next
}

Expand Down Expand Up @@ -200,11 +200,11 @@ anomDetectLOH<-function(intenData, genoData, scan.ids, chrom.ids, snp.ids,
if(!is.null(raw.adj)) raw.adj$sex<-sex[sindex]
filtered<-outt$filtered
if(!is.null(filtered)) filtered$sex<-sex[sindex]
LOH.raw.adjusted<-rbind(LOH.raw.adjusted,raw.adj)
LOH.filtered<-rbind(LOH.filtered, filtered)
LOH.raw.adjusted<-bind_rows(LOH.raw.adjusted,raw.adj)
LOH.filtered<-bind_rows(LOH.filtered, filtered)
if(outt$merge.flag){
tmp<-data.frame(snum,ch);names(tmp)<-c("scanID","chrom")
LOH.merge<-rbind(LOH.merge,tmp)
LOH.merge<-bind_rows(LOH.merge,tmp)
}

} #end chrom loop
Expand Down
42 changes: 21 additions & 21 deletions R/anomFilterBAF.R
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ frac.used<-an$num.mark/(an$right-an$left+1)
new<-data.frame(snum,ch,new.left,new.right,new.num.mark,new.seg.mean,new.sdfac,sx,TRUE,stringsAsFactors=FALSE)
names(new)<-c("scanID","chrom","left","right","num.mark","seg.mean","sd.fac","sex","merge")

merged.anoms<-rbind(merged.anoms,new)
merged.anoms<-bind_rows(merged.anoms,new)

}
}
Expand Down Expand Up @@ -400,7 +400,7 @@ frac.used<-an$num.mark/(an$right-an$left+1)
new<-data.frame(snum,ch,new.left,new.right,new.num.mark,new.seg.mean,new.sdfac,sx,TRUE,stringsAsFactors=FALSE)
names(new)<-c("scanID","chrom","left","right","num.mark","seg.mean","sd.fac","sex","merge")

merged.anoms<-rbind(merged.anoms,new)
merged.anoms<-bind_rows(merged.anoms,new)

}
}
Expand All @@ -410,7 +410,7 @@ frac.used<-an$num.mark/(an$right-an$left+1)
if(length(del.merge)!=0){
tmp<-an[-del.merge,names(merged.anoms)]

out<-rbind(tmp,merged.anoms)
out<-bind_rows(tmp,merged.anoms)
out<-out[order(out$left),]
} else out<-an

Expand Down Expand Up @@ -470,7 +470,7 @@ for(I in 1:dim(anoms)[1]){
w<-frac>frac.thresh | pct<low.frac.used
if(ct< ct.thresh |!w){
an$old.left<-an$left; an$old.right<-an$right
anoms.rev<-rbind(anoms.rev,an)
anoms.rev<-bind_rows(anoms.rev,an)
next
}
who<-!whm
Expand All @@ -486,7 +486,7 @@ for(I in 1:dim(anoms)[1]){
rlen<-length(r0)
if(rlen==1){ # keep original if only one value
an$old.left<-an$left; an$old.right<-an$right
anoms.rev<-rbind(anoms.rev,an)
anoms.rev<-bind_rows(anoms.rev,an)
next
}

Expand Down Expand Up @@ -533,14 +533,14 @@ nvals<-new.rle[[2]]
nlens<-new.rle[[1]] ##indicates how many of original runs to put together
if(length(nvals)==1){ #all now classified as undesirable or as desirable = no change
an$old.left<-an$left; an$old.right<-an$right
anoms.rev<-rbind(anoms.rev,an)
anoms.rev<-bind_rows(anoms.rev,an)
next
}

newt<-which(nvals==0) #newt could be empty if originally there were no long het/miss runs
if(length(newt)==0){
an$old.left<-an$left; an$old.right<-an$right
anoms.rev<-rbind(anoms.rev,an)
anoms.rev<-bind_rows(anoms.rev,an)
next
}

Expand All @@ -559,15 +559,15 @@ right<-c(right,in.pos[ind+1+nlens[k]]-1)} else {right<-c(right,length(index))}}
## if splits into more than one run, leave as the original
if(length(right)==0|length(left)==0|length(right)>1|length(left)>1){
an$old.left<-an$left; an$old.right<-an$right
anoms.rev<-rbind(anoms.rev,an)
anoms.rev<-bind_rows(anoms.rev,an)
next
}

## there is one adjusted interval found

an$old.left<-an$left;an$old.right<-an$right
an$left<-index[left];an$right<-index[right]
anoms.rev<-rbind(anoms.rev,an)
anoms.rev<-bind_rows(anoms.rev,an)
} #end loop on anomalies
return(anoms.rev)
}
Expand Down Expand Up @@ -774,13 +774,13 @@ anomFilterBAF<-function(intenData, genoData, segments, snp.ids,
normi<-data.frame(snum,base.mean,base.sd,braw.base.med,chr.ct)
names(normi)<-c("scanID","base.mean","base.sd","base.baf.med","chr.ct")

normal.info<-rbind(normal.info,normi)
normal.info<-bind_rows(normal.info,normi)


an$sd.fac<-sd.fac
an$sex<-sex[sindex]

anoms2<-rbind(anoms2,an)
anoms2<-bind_rows(anoms2,an)

##an is segment/sd.fac info for the given sample
## base.mean and base.sd are for the given sample
Expand All @@ -806,7 +806,7 @@ anomFilterBAF<-function(intenData, genoData, segments, snp.ids,
tp<-data.frame(snum,ch,nsegs)
names(tp)<-c("scanID","chrom","num.segs")

an.seg.info<-rbind(an.seg.info,tp)
an.seg.info<-bind_rows(an.seg.info,tp)

## cent-span code insert here
centL<-centromere$left.base[centromere$chrom==ch]
Expand Down Expand Up @@ -874,8 +874,8 @@ anomFilterBAF<-function(intenData, genoData, segments, snp.ids,
t2<-(s2|s3) & s1

if(!t1 & !t2) anch<-anrest #delete both failed
if(t1 & !t2) anch<-rbind(anrest,tmp[,names(anch)]) # delete one fail, keep other OK
if(!t1 & t2) anch<-rbind(anrest,tmp2[,names(anch)])
if(t1 & !t2) anch<-bind_rows(anrest,tmp[,names(anch)]) # delete one fail, keep other OK
if(!t1 & t2) anch<-bind_rows(anrest,tmp2[,names(anch)])
if(t1&t2){ # check same type and similar baf.dev (width)

# gain, loss, neutral
Expand All @@ -902,7 +902,7 @@ anomFilterBAF<-function(intenData, genoData, segments, snp.ids,
q2<-relerr<=dev.sim.thresh

if(!q | !q2) { # keep each piece separate
anch<-rbind(anrest,tmp[,names(anch)],tmp2[,names(anch)])
anch<-bind_rows(anrest,tmp[,names(anch)],tmp2[,names(anch)])
}
# note at this stage, anch hasn't changed
}
Expand Down Expand Up @@ -960,13 +960,13 @@ anomFilterBAF<-function(intenData, genoData, segments, snp.ids,
filin<-union(s,d.keep)
fil<-tst.rev[filin,]

an3.fil<-rbind(an3.fil,fil) #becomes filtered anoms for current sample
an3.fil<-bind_rows(an3.fil,fil) #becomes filtered anoms for current sample
}#end ch loop


anoms.fil<-rbind(anoms.fil,an3.fil)
anoms.fil<-bind_rows(anoms.fil,an3.fil)

seg.info<-rbind(seg.info,an.seg.info)
seg.info<-bind_rows(seg.info,an.seg.info)

} #end of sample loop
anoms2<-anoms2[order(anoms2$scanID,anoms2$chrom,anoms2$left),] #raw annotated
Expand All @@ -992,17 +992,17 @@ anomFilterBAF<-function(intenData, genoData, segments, snp.ids,
an.XY$homodel.adjust<-NA
an.XY$left.base<-getPosition(intenData, index=an.XY$left)
an.XY$right.base<-getPosition(intenData, index=an.XY$right)} else an.XY<-NULL
anoms.fil<-rbind(anoms.fil,an.XY)
anoms.fil<-bind_rows(anoms.fil,an.XY)
an.seg.info<-NULL
s<-unique(tmp$scanID)
for(snum in s){
an<-tmp[tmp$scanID==snum,]
ns<-dim(an)[1]
tt<-data.frame(snum,XYchromCode(intenData),ns)
names(tt)<-c("scanID","chrom","num.segs")
an.seg.info<-rbind(an.seg.info,tt)
an.seg.info<-bind_rows(an.seg.info,tt)
}
seg.info<-rbind(seg.info,an.seg.info)
seg.info<-bind_rows(seg.info,an.seg.info)
}


Expand Down
6 changes: 3 additions & 3 deletions R/anomIdentifyLowQuality.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ for(i in unique(snp.chrom)) {w<-snp.chrom==i& snp$eligible
m<-sum(w)
tmp<-data.frame(i,m)
names(tmp)<-c("chromosome","marker.length")
ch.marker.info<-rbind(ch.marker.info,tmp)
ch.marker.info<-bind_rows(ch.marker.info,tmp)
}
#############
del<-seg.info$chromosome!=XYchromCode(snp)
Expand Down Expand Up @@ -56,7 +56,7 @@ for(snum in smp){
tch<-length(which(nums$num.segs>1))
tmp<-data.frame(snum,nsX,mfX,mfach,mfauto,anseg,tch,fac.all,bfsd)
names(tmp)<-c("scanID","chrX.num.segs","chrX.fac","max.autosome","max.auto.fac","max.auto.num.segs","num.ch.segd","fac.all.auto","med.sd")
sampchr<-rbind(sampchr,tmp)
sampchr<-bind_rows(sampchr,tmp)
}
return(sampchr)
} #end function
Expand Down Expand Up @@ -89,7 +89,7 @@ if(sum(chbfseg)!=0){bad.auto<-samp.info[chbfseg,]
bad.auto$type<-"auto.seg"} else bad.auto<-NULL
if(sum(chbfboth)!=0) {bad.both<-samp.info[chbfboth,]
bad.both$type<-"both.sd.seg"} else bad.both<-NULL
bad<-rbind(bad.sd,bad.sng,bad.sngX,bad.auto,bad.both)
bad<-bind_rows(bad.sd,bad.sng,bad.sngX,bad.auto,bad.both)
if(!is.null(bad)) bad<-bad[order(bad$scanID),]
return(bad)
}# end function
Expand Down
2 changes: 1 addition & 1 deletion R/anomSegmentBAF.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ anomSegmentBAF<-function(intenData, genoData, scan.ids, chrom.ids, snp.ids,
#$loc.start and $loc.end are indices of snp/indices of intid

names(tmp)<-c("scanID","chromosome","left.index","right.index","num.mark","seg.mean")
anoms<-rbind(anoms,tmp)
anoms<-bind_rows(anoms,tmp)
} #end loop on samples


Expand Down
5 changes: 3 additions & 2 deletions R/gdsSubset.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ gdsSubset <- function(parent.gds,
sub.storage=NULL,
compress="LZMA_RA",
block.size=5000,
verbose=TRUE){
verbose=TRUE,
allow.fork=FALSE){

# this function only works for gds files having up to two dimensions, named "snp" and "sample"
gds <- openfn.gds(parent.gds)
gds <- openfn.gds(parent.gds, allow.fork=allow.fork)

# check that sample.include are all elements of sample.id
sampID <- read.gdsn(index.gdsn(gds, "sample.id"))
Expand Down
9 changes: 5 additions & 4 deletions R/gdsSubsetCheck.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ gdsSubsetCheck <- function(parent.gds,
sample.include=NULL,
snp.include=NULL,
sub.storage=NULL,
verbose=TRUE) {
verbose=TRUE,
allow.fork=FALSE) {

# this assumes that sample.id is the only 1D sample variable in the GDS

gds <- openfn.gds(parent.gds)
gds.sub <- openfn.gds(sub.gds)
gds <- openfn.gds(parent.gds, allow.fork=allow.fork)
gds.sub <- openfn.gds(sub.gds, allow.fork=allow.fork)

# check sampleID
sampID.parent <- read.gdsn(index.gdsn(gds, "sample.id"))
Expand Down Expand Up @@ -226,4 +227,4 @@ gdsSubsetCheck <- function(parent.gds,
closefn.gds(gds)
closefn.gds(gds.sub)
message("All variables match.")
}
}
Loading

0 comments on commit 8cdf2c5

Please sign in to comment.