In [179]:
source('simulation_library.R')
library(ggplot2)
set.seed(123)
# set sample size
sample.size.whole.vector <- c(500)
# set postive sample (with snv) rate
snv.yes.rate.vector <- c(0.3)
# set zero rate for snv
zero.snv.rate.vector <- c(0.3)
# set snv readcounts level
snv.gene.mu <- 100
# set zero rate for gene
zero.gene.rate.vector <- c(0.3)
# set gene expression level for positive and negative observations
gene.mu.vector <- list(c(300,30))
##gene.mu.vector <- list(c(300,30))



### start simulation  ###

# theme for ggplot
mynamestheme <- theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (20),hjust = 0.5), 
                 legend.title = element_text(colour = "steelblue",  face = "bold.italic", family = "Helvetica"), 
                 legend.text = element_text(face = "italic", colour="steelblue4",family = "Helvetica"), 
                  axis.title = element_text(family = "Helvetica", size = (16), colour = "black"),
                  axis.text = element_text(family = "Courier", colour = "black", size = (12)))

# simulation
times <- 1000
p.threshold.5 <- 0.05
p.threshold.1 <- 0.01

result.df <- data.frame(sample.size.whole=integer(),
                 snv.yes.rate=double(),
                 zero.snv.rate=double(),
                 snv.gene.mu=double(),
                 zero.gene.rate=double(),
                 gene.with.snv.mu=double(),
                 gene.without.snv.mu=double(),
                 pvalue.lm.rejection.rate.5=double(),
                 pvalue.zinb.rejection.rate.5=double(),
                 pvalue.lm.rejection.rate.1=double(),
                 pvalue.zinb.rejection.rate.1=double())
for (sample.size.whole in sample.size.whole.vector){
    for (snv.yes.rate in snv.yes.rate.vector){
        for (zero.snv.rate in zero.snv.rate.vector){
            for (zero.gene.rate in zero.gene.rate.vector){
                for (gene.mu in gene.mu.vector){
                    gene.with.snv.mu <- gene.mu[1]
                    gene.without.snv.mu <- gene.mu[2]
                    pvalue.lm <- integer(times)
                    pvalue.zinb <- integer(times)
                    for (t in seq(1:times)){
                        sample.issnv.size <- ceiling(sample.size.whole * snv.yes.rate)
                        sample.notsnv.size <- sample.size.whole - sample.issnv.size
                        simulation.data <- related.pair.simulation(sample.issnv.size,sample.notsnv.size,
                                                   zero.snv.rate, zero.gene.rate,snv.gene.mu,
                                                   gene.with.snv.mu, gene.without.snv.mu)
                        pvalue.current <- calculate.loglinear.zinb.pvalue(simulation.data)
                        pvalue.lm[t] <- pvalue.current['pvalue.lm']
                        pvalue.zinb[t] <- pvalue.current['pvalue.zinb']
                        #if(t%%100 == 0){message(t)}
                    }
                    pvalue.lm.rejection.rate.5 <- sum(pvalue.lm < p.threshold.5,na.rm = TRUE) / sum(!is.na(pvalue.lm))
                    pvalue.zinb.rejection.rate.5 <- sum(pvalue.zinb < p.threshold.5,na.rm = TRUE) / sum(!is.na(pvalue.zinb))
                    
                    pvalue.lm.rejection.rate.1 <- sum(pvalue.lm < p.threshold.1,na.rm = TRUE) / sum(!is.na(pvalue.lm))
                    pvalue.zinb.rejection.rate.1 <- sum(pvalue.zinb < p.threshold.1,na.rm = TRUE) / sum(!is.na(pvalue.zinb))
                    
                    #------------  for generating pvalue histgram ---------------
                    # 
                    # name system
                    # ss: sample size
                    # pr: postive rate (rate of snv==1)
                    # zs: snv zero rate
                    # ms: snv read counts
                    # zg: gene zero rate
                    # mgp: gene(postive) read counts level 
                    # mgn: gene(negative) read counts level
                    result <- list(sample.size.whole,snv.yes.rate,zero.snv.rate,
                                  snv.gene.mu,zero.gene.rate,gene.with.snv.mu,
                                  gene.without.snv.mu,pvalue.lm.rejection.rate.5,
                                  pvalue.zinb.rejection.rate.5,pvalue.lm.rejection.rate.1,
                                  pvalue.zinb.rejection.rate.1)
                    result.df[nrow(result.df)+1,] <-  result
                    title.ss <- paste('ss',sample.size.whole,sep = "_")
                    title.pr <- paste('pr',snv.yes.rate,sep = "_")
                    title.zs <- paste('zs',zero.snv.rate,sep = "_")
                    title.ms <- paste('ms',snv.gene.mu,sep = "_")
                    title.zg <- paste('zg',zero.gene.rate,sep = "_")
                    title.mgp <- paste('mgp',gene.with.snv.mu,sep = "_")
                    title.mgn <- paste('mgn',gene.without.snv.mu,sep = "_")
                    title <- paste(paste(title.ss,title.pr,title.zs,
                               title.ms,title.zg,title.mgp,
                               title.mgn,sep = "__"),sep = "")
                    message(title)
                    }
                }
            
          }
    }
}
object.name.Rda <- paste(title.mgp,title.mgn,'simulation.Rda',sep = "_")
object.name.Rds <- paste(title.mgp,title.mgn,'simulation.Rds',sep = "_")
save(result.df, file = object.name.Rda)
saveRDS(result.df, file = object.name.Rds)


ss_500__pr_0.3__zs_0.3__ms_100__zg_0.3__mgp_300__mgn_30

