In [130]:
source('simulation_library.R')
library(ggplot2)
set.seed(123)
# set sample size
sample.size.whole.vector <- c(30,100,500,1000)
# set postive sample (with snv) rate
snv.yes.rate.vector <- c(0.2,0.5,0.8)
# set zero rate for snv
zero.snv.rate.vector <- c(0.2,0.5,0.8)
# set snv readcounts level
snv.gene.mu <- 100
# set zero rate for gene
zero.gene.rate.vector <- c(0.2,0.5,0.8)
# set gene expression level for positive and negative observations
gene.mu.vector <- list(c(300,300),c(30,300),c(300,30))
##gene.mu.vector <- list(c(300,30))



### start simulation  ###

# theme for ggplot
mynamestheme <- theme(plot.title = element_text(family = "Helvetica", face = "bold", size = (20),hjust = 0.5), 
                 legend.title = element_text(colour = "steelblue",  face = "bold.italic", family = "Helvetica"), 
                 legend.text = element_text(face = "italic", colour="steelblue4",family = "Helvetica"), 
                  axis.title = element_text(family = "Helvetica", size = (16), colour = "black"),
                  axis.text = element_text(family = "Courier", colour = "black", size = (12)))

# simulation
times <- 100
p.threshold <- 0.05
result.df <- data.frame(sample.size.whole=integer(),
                 snv.yes.rate=double(),
                 zero.snv.rate=double(),
                 snv.gene.mu=double(),
                 zero.gene.rate=double(),
                 gene.with.snv.mu=double(),
                 gene.without.snv.mu=double(),
                 pvalue.lm.rejection.rate=double(),
                 pvalue.zinb.rejection.rate=double())
for (sample.size.whole in sample.size.whole.vector){
    for (snv.yes.rate in snv.yes.rate.vector){
        for (zero.snv.rate in zero.snv.rate.vector){
            for (zero.gene.rate in zero.gene.rate.vector){
                for (gene.mu in gene.mu.vector){
                    gene.with.snv.mu <- gene.mu[1]
                    gene.without.snv.mu <- gene.mu[2]
                    pvalue.lm <- integer(times)
                    pvalue.zinb <- integer(times)
                    for (t in seq(1:times)){
                        sample.issnv.size <- ceiling(sample.size.whole * snv.yes.rate)
                        sample.notsnv.size <- sample.size.whole - sample.issnv.size
                        simulation.data <- related.pair.simulation(sample.issnv.size,sample.notsnv.size,
                                                   zero.snv.rate, zero.gene.rate,snv.gene.mu,
                                                   gene.with.snv.mu, gene.without.snv.mu)
                        pvalue.current <- calculate.loglinear.zinb.pvalue(simulation.data)
                        pvalue.lm[t] <- pvalue.current['pvalue.lm']
                        pvalue.zinb[t] <- pvalue.current['pvalue.zinb']
                        if(t%%100 == 0){message(t)}
                    }
                    #result <- data.frame(pvalue.lm,pvalue.zinb)
                    pvalue.lm.rejection.rate <- sum(pvalue.lm < p.threshold,na.rm = TRUE) / sum(!is.na(pvalue.lm))
                    pvalue.zinb.rejection.rate <- sum(pvalue.zinb < p.threshold,na.rm = TRUE) / sum(!is.na(pvalue.zinb))
                    
                    #------------  for generating pvalue histgram ---------------
                    # 
                    # name system
                    # ss: sample size
                    # pr: postive rate (rate of snv==1)
                    # zs: snv zero rate
                    # ms: snv read counts
                    # zg: gene zero rate
                    # mgp: gene(postive) read counts level 
                    # mgn: gene(negative) read counts level
                    result <- list(sample.size.whole,snv.yes.rate,zero.snv.rate,
                                  snv.gene.mu,zero.gene.rate,gene.with.snv.mu,
                                  gene.without.snv.mu,pvalue.lm.rejection.rate,
                                  pvalue.zinb.rejection.rate)
                    result.df[nrow(result.df)+1,] <-  result
                    pdf.title.ss <- paste('ss',sample.size.whole,sep = "_")
                    pdf.title.pr <- paste('pr',snv.yes.rate,sep = "_")
                    pdf.title.zs <- paste('zs',zero.snv.rate,sep = "_")
                    pdf.title.ms <- paste('ms',snv.gene.mu,sep = "_")
                    pdf.title.zg <- paste('zg',zero.gene.rate,sep = "_")
                    pdf.title.mgp <- paste('mgp',gene.with.snv.mu,sep = "_")
                    pdf.title.mgn <- paste('mgn',gene.without.snv.mu,sep = "_")
                    pdf.title <- paste(paste('PvalueHistogram',pdf.title.ss,pdf.title.pr,pdf.title.zs,
                               pdf.title.ms,pdf.title.zg,pdf.title.mgp,
                               pdf.title.mgn,sep = "__"), ".pdf",sep = "")
                    pdf.path <- file.path('images',pdf.title)
            
                    # plot histogram
                    if(FALSE){
                    histplot <- ggplot(data = result) + 
                    geom_histogram(aes(x=pvalue,y = (..count..)/sum(..count..)),bins=10,breaks=seq(0,1,by=0.05),fill='white',color='black') + 
                    scale_y_continuous(labels = scales::percent) + 
                    ylab('Percent') +
                    theme_classic() +
                    ggtitle('Histgram for pvalue') + mynamestheme}
                    #suppressMessages(ggsave(pdf.path,plot = histplot))
                    message(pdf.path)
                    }
                }
            
          }
    }
}
  


100

images/PvalueHistogram__ss_30__pr_0.2__zs_0.2__ms_100__zg_0.2__mgp_300__mgn_300.pdf

100

images/PvalueHistogram__ss_30__pr_0.2__zs_0.2__ms_100__zg_0.2__mgp_30__mgn_300.pdf

100

images/PvalueHistogram__ss_30__pr_0.2__zs_0.2__ms_100__zg_0.2__mgp_300__mgn_30.pdf

100

images/PvalueHistogram__ss_30__pr_0.2__zs_0.2__ms_100__zg_0.5__mgp_300__mgn_300.pdf

100

images/PvalueHistogram__ss_30__pr_0.2__zs_0.2__ms_100__zg_0.5__mgp_30__mgn_300.pdf

100

images/PvalueHistogram__ss_30__pr_0.2__zs_0.2__ms_100__zg_0.5__mgp_300__mgn_30.pdf

100

images/PvalueHistogram__ss_30__pr_0.2__zs_0.2__ms_100__zg_0.8__mgp_300__mgn_300.pdf

100

images/PvalueHistogram__ss_30__pr_0.2__zs_0.2__ms_100__zg_0.8__mgp_30__mgn_300.pdf

100

images/PvalueHistogram__ss_30__pr_0.2__zs_0.2__ms_100__zg_0.8__mgp_300__mgn_30.pdf

100

images/PvalueHistogram__ss_30__pr_0.2__zs_0.5__ms_100__zg_0.2__mgp_300__mgn_300.pdf

100

images/PvalueHistogram__ss_30__pr_0.2__zs_0.5__ms_100__zg_0.2__mgp_30__mgn_300.pdf

100

images/Pvalu

100

images/PvalueHistogram__ss_100__pr_0.2__zs_0.5__ms_100__zg_0.2__mgp_300__mgn_30.pdf

100

images/PvalueHistogram__ss_100__pr_0.2__zs_0.5__ms_100__zg_0.5__mgp_300__mgn_300.pdf

100

images/PvalueHistogram__ss_100__pr_0.2__zs_0.5__ms_100__zg_0.5__mgp_30__mgn_300.pdf

100

images/PvalueHistogram__ss_100__pr_0.2__zs_0.5__ms_100__zg_0.5__mgp_300__mgn_30.pdf

100

images/PvalueHistogram__ss_100__pr_0.2__zs_0.5__ms_100__zg_0.8__mgp_300__mgn_300.pdf

100

images/PvalueHistogram__ss_100__pr_0.2__zs_0.5__ms_100__zg_0.8__mgp_30__mgn_300.pdf

100

images/PvalueHistogram__ss_100__pr_0.2__zs_0.5__ms_100__zg_0.8__mgp_300__mgn_30.pdf

100

images/PvalueHistogram__ss_100__pr_0.2__zs_0.8__ms_100__zg_0.2__mgp_300__mgn_300.pdf

100

images/PvalueHistogram__ss_100__pr_0.2__zs_0.8__ms_100__zg_0.2__mgp_30__mgn_300.pdf

100

images/PvalueHistogram__ss_100__pr_0.2__zs_0.8__ms_100__zg_0.2__mgp_300__mgn_30.pdf

100

images/PvalueHistogram__ss_100__pr_0.2__zs_0.8__ms_100__zg_0.5__mgp_300__mgn_300.pdf

100

i

100

images/PvalueHistogram__ss_500__pr_0.2__zs_0.8__ms_100__zg_0.5__mgp_300__mgn_300.pdf

100

images/PvalueHistogram__ss_500__pr_0.2__zs_0.8__ms_100__zg_0.5__mgp_30__mgn_300.pdf

100

images/PvalueHistogram__ss_500__pr_0.2__zs_0.8__ms_100__zg_0.5__mgp_300__mgn_30.pdf

100

images/PvalueHistogram__ss_500__pr_0.2__zs_0.8__ms_100__zg_0.8__mgp_300__mgn_300.pdf

100

images/PvalueHistogram__ss_500__pr_0.2__zs_0.8__ms_100__zg_0.8__mgp_30__mgn_300.pdf

100

images/PvalueHistogram__ss_500__pr_0.2__zs_0.8__ms_100__zg_0.8__mgp_300__mgn_30.pdf

100

images/PvalueHistogram__ss_500__pr_0.5__zs_0.2__ms_100__zg_0.2__mgp_300__mgn_300.pdf

100

images/PvalueHistogram__ss_500__pr_0.5__zs_0.2__ms_100__zg_0.2__mgp_30__mgn_300.pdf

100

images/PvalueHistogram__ss_500__pr_0.5__zs_0.2__ms_100__zg_0.2__mgp_300__mgn_30.pdf

100

images/PvalueHistogram__ss_500__pr_0.5__zs_0.2__ms_100__zg_0.5__mgp_300__mgn_300.pdf

100

images/PvalueHistogram__ss_500__pr_0.5__zs_0.2__ms_100__zg_0.5__mgp_30__mgn_300.pdf

100

i

100

images/PvalueHistogram__ss_1000__pr_0.5__zs_0.2__ms_100__zg_0.5__mgp_30__mgn_300.pdf

100

images/PvalueHistogram__ss_1000__pr_0.5__zs_0.2__ms_100__zg_0.5__mgp_300__mgn_30.pdf

100

images/PvalueHistogram__ss_1000__pr_0.5__zs_0.2__ms_100__zg_0.8__mgp_300__mgn_300.pdf

100

images/PvalueHistogram__ss_1000__pr_0.5__zs_0.2__ms_100__zg_0.8__mgp_30__mgn_300.pdf

100

images/PvalueHistogram__ss_1000__pr_0.5__zs_0.2__ms_100__zg_0.8__mgp_300__mgn_30.pdf

100

images/PvalueHistogram__ss_1000__pr_0.5__zs_0.5__ms_100__zg_0.2__mgp_300__mgn_300.pdf

100

images/PvalueHistogram__ss_1000__pr_0.5__zs_0.5__ms_100__zg_0.2__mgp_30__mgn_300.pdf

100

images/PvalueHistogram__ss_1000__pr_0.5__zs_0.5__ms_100__zg_0.2__mgp_300__mgn_30.pdf

100

images/PvalueHistogram__ss_1000__pr_0.5__zs_0.5__ms_100__zg_0.5__mgp_300__mgn_300.pdf

100

images/PvalueHistogram__ss_1000__pr_0.5__zs_0.5__ms_100__zg_0.5__mgp_30__mgn_300.pdf

100

images/PvalueHistogram__ss_1000__pr_0.5__zs_0.5__ms_100__zg_0.5__mgp_300__mgn_30.p

Unnamed: 0_level_0,sample.size.whole,snv.yes.rate,zero.snv.rate,snv.gene.mu,zero.gene.rate,gene.with.snv.mu,gene.without.snv.mu,pvalue.lm.rejection.rate,pvalue.zinb.rejection.rate
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,30,0.2,0.2,100,0.1,300,30,0.3,0.99
2,30,0.2,0.5,100,0.1,300,30,0.46,0.98
3,30,0.2,0.8,100,0.1,300,30,0.2040816,0.0
4,30,0.5,0.2,100,0.1,300,30,0.43,1.0
5,30,0.5,0.5,100,0.1,300,30,0.33,0.67
6,30,0.5,0.8,100,0.1,300,30,0.04,0.0
7,30,0.8,0.2,100,0.1,300,30,0.09,0.97
8,30,0.8,0.5,100,0.1,300,30,0.08,0.0
9,30,0.8,0.8,100,0.1,300,30,0.08,0.0
10,100,0.2,0.2,100,0.1,300,30,1.0,1.0


In [131]:
save(result.df, file = "result_df.Rdata")

In [132]:
result.df

Unnamed: 0_level_0,sample.size.whole,snv.yes.rate,zero.snv.rate,snv.gene.mu,zero.gene.rate,gene.with.snv.mu,gene.without.snv.mu,pvalue.lm.rejection.rate,pvalue.zinb.rejection.rate
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,30,0.2,0.2,100,0.2,300,300,0.0000000,0.04000000
2,30,0.2,0.2,100,0.2,30,300,0.0800000,1.00000000
3,30,0.2,0.2,100,0.2,300,30,0.0600000,1.00000000
4,30,0.2,0.2,100,0.5,300,300,0.0000000,0.03000000
5,30,0.2,0.2,100,0.5,30,300,0.0000000,1.00000000
6,30,0.2,0.2,100,0.5,300,30,0.2400000,0.87000000
7,30,0.2,0.2,100,0.8,300,300,0.0000000,0.06250000
8,30,0.2,0.2,100,0.8,30,300,0.0000000,1.00000000
9,30,0.2,0.2,100,0.8,300,30,0.0000000,1.00000000
10,30,0.2,0.5,100,0.2,300,300,0.0100000,0.05050505
