In [None]:
source('simulation_library.R')
library(ggplot2)
set.seed(123)
# set sample size
sample.size.whole.vector <- c(30,100,500)
# set postive sample (with snv) rate
snv.yes.rate.vector <- c(0.2,0.5,0.8)
# set zero rate for snv
zero.snv.rate.vector <- c(0.2,0.5,0.8)
# set snv readcounts level
snv.gene.mu <- 100
# set zero rate for gene
zero.gene.rate.vector <- c(0.2,0.5,0.8)
# set gene expression level for positive and negative observations
gene.mu.vector <- list(c(300,300),c(30,300),c(300,30))



### start simulation  ###

# theme for ggplot
mynamestheme <- theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (20),hjust = 0.5), 
                 legend.title = element_text(colour = "steelblue",  face = "bold.italic", family = "Helvetica"), 
                 legend.text = element_text(face = "italic", colour="steelblue4",family = "Helvetica"), 
                  axis.title = element_text(family = "Helvetica", size = (16), colour = "black"),
                  axis.text = element_text(family = "Courier", colour = "black", size = (12)))

# simulation
times <- 1000
for (sample.size.whole in sample.size.whole.vector){
    for (snv.yes.rate in snv.yes.rate.vector){
        for (zero.snv.rate in zero.snv.rate.vector){
            for (zero.gene.rate in zero.gene.rate.vector){
                for (gene.mu in gene.mu.vector){
                    gene.with.snv.mu <- gene.mu[1]
                    gene.without.snv.mu <- gene.mu[2]
                    pvalue <- integer(times)
                    for (t in seq(1:times)){
                        sample.issnv.size <- ceiling(sample.size.whole * snv.yes.rate)
                        sample.notsnv.size <- sample.size.whole - sample.issnv.size
                        simulation.data <- related.pair.simulation(sample.issnv.size,sample.notsnv.size,
                                                   zero.snv.rate, zero.gene.rate,snv.gene.mu,
                                                   gene.with.snv.mu, gene.without.snv.mu)
                        pvalue.current <- calculate.pvalue(simulation.data)
                        pvalue[t] <- pvalue.current
                        #if(t%%200 == 0){message(t)}
                    }
                    result <- data.frame(pvalue)
                    #------------  for generating pvalue histgram ---------------
                    # 
                    # name system
                    # ss: sample size
                    # pr: postive rate (rate of snv==1)
                    # zs: snv zero rate
                    # ms: snv read counts
                    # zg: gene zero rate
                    # mgp: gene(postive) read counts level 
                    # mgn: gene(negative) read counts level
                    pdf.title.ss <- paste('ss',sample.size.whole,sep = "_")
                    pdf.title.pr <- paste('pr',snv.yes.rate,sep = "_")
                    pdf.title.zs <- paste('zs',zero.snv.rate,sep = "_")
                    pdf.title.ms <- paste('ms',snv.gene.mu,sep = "_")
                    pdf.title.zg <- paste('zg',zero.gene.rate,sep = "_")
                    pdf.title.mgp <- paste('mgp',gene.with.snv.mu,sep = "_")
                    pdf.title.mgn <- paste('mgn',gene.without.snv.mu,sep = "_")
                    pdf.title <- paste(paste('PvalueHistogram',pdf.title.ss,pdf.title.pr,pdf.title.zs,
                               pdf.title.ms,pdf.title.zg,pdf.title.mgp,
                               pdf.title.mgn,sep = "__"), ".pdf",sep = "")
                    pdf.path <- file.path('images',pdf.title)
            
                    # plot histogram
                    histplot <- ggplot(data = result) + 
                    geom_histogram(aes(x=pvalue,y = (..count..)/sum(..count..)),bins=10,breaks=seq(0,1,by=0.05),fill='white',color='black') + 
                    scale_y_continuous(labels = scales::percent) + 
                    ylab('Percent') +
                    theme_classic() +
                    ggtitle('Histgram for pvalue') + mynamestheme
                    suppressMessages(ggsave(pdf.path,plot = histplot))
                    message(pdf.path)
                    }
                }
            
          }
    }
}
  


Classes and Methods for R developed in the
Political Science Computational Laboratory
Department of Political Science
Stanford University
Simon Jackman
hurdle and zeroinfl functions by Achim Zeileis

Loading required package: zoo


Attaching package: ‘zoo’


The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric


images/PvalueHistogram__ss_30__pr_0.2__zs_0.2__ms_100__zg_0.2__mgp_300__mgn_300.pdf

images/PvalueHistogram__ss_30__pr_0.2__zs_0.2__ms_100__zg_0.2__mgp_30__mgn_300.pdf

