In [None]:
%load_ext rpy2.ipython

In [None]:
%%bash
export WD=./Simulation_result
export CONF=$(pwd)/Exp_conf_test
export SCRIPT=$(pwd)/../Simulation_Script/simulation.py

mkdir ./Experiment_out
mkdir $WD
cd $WD

for CONFIG in $CONF/*.json
do
    python3 $SCRIPT $CONFIG
done

In [None]:
%%R

groups <- list.dirs(path = "./Simulation_result", full.names = TRUE, recursive = FALSE)

datalist <- c()
x <- c("Group","Experiment", "Pattern", "Peak", "Path")
for (group in groups){
    group_name <- basename(group)
    experiments <- list.dirs(path = group, full.names = TRUE, recursive = FALSE)
    for (experiment in experiments){
        exp_name <- basename(experiment)
        patterns <- list.dirs(path = experiment, full.names = TRUE, recursive = FALSE)
        for (pattern in patterns) {
            pattern_name<-basename(pattern)
            peaks <- list.dirs(path = pattern, full.names = TRUE, recursive = FALSE)
            for (peak in peaks) {
                peak_name<-basename(peak)
                peak_file <- list.files(path = peak, full.names = TRUE, recursive = FALSE)[1]
                datalist <- rbind(datalist, c(group_name, exp_name, pattern_name, peak_name, peak_file))
            }
        }
    }
}

datadf = data.frame(datalist)
colnames(datadf) <- x

In [None]:
%%R
library(MMDiff3)
library(reshape2)
library(ggplot2)
library(GenomicRanges)
library(DiffBind)
library(devtools)

In [None]:
%%R

experiment_groups <- as.list(unique(datadf[['Group']]))

for (exp_group in experiment_groups){

    group_df <- subset(datadf, Group == exp_group)
    group_dir <- paste0('./Experiment_out','/',exp_group)
    dir.create(paste0(group_dir,'/'))
    
    n_mods <- 10

    modnames<-sapply(0:9, function(i) paste0('mod_',i))
    samplesheet <- data.frame(list(modnames, modnames, rep(1, 10), rep('Ctr', 10), rep(1,10), rep('macs', 10)))
    colnames(samplesheet)<-c('SampleID', 'Tissue', 'Factor', 'Condition', 'Replicate', 'PeakCaller')
    write.csv(samplesheet, paste0(group_dir,'/','SampleSheet.csv'), row.names=FALSE)
    replicas <- as.list(unique(group_df[['Experiment']]))
                     
    for (replica in replicas){
        replica_df <- subset(group_df, Experiment == replica)
        peaks <- as.list(unique(replica_df[['Peak']]))
        patterns <- as.list(unique(replica_df[['Pattern']]))

        for (pattern in patterns) { 
            ExperimentData <- list(genome='none',
                                    dataDir='.',
                                    sampleSheet =paste0(group_dir,'/','SampleSheet.csv'))
            MetaData <- list('ExpData' = ExperimentData)
            MMD <- DBAmmd(MetaData)

            peak_df <- data.frame(chr=c(0:(length(peaks)-1)))
            peak_df$start <- 0
            peak_df$end <- 3300
            regions <- makeGRangesFromDataFrame(peak_df)
            MMD <- setRegions(MMD, regions)

            Meta <- metaData(MMD)
            Meta$AnaData$pairedEnd <- rep(FALSE, n_mods)
            Meta$AnaData$PeakBoundary <- 200
            MMD@MetaData <- Meta
            ################################   
            pattern_df = subset(replica_df, Pattern == pattern)
            dir.create(paste0(group_dir,"/",replica))
            dirname <- paste0(group_dir,"/",replica,"/",pattern)
            dir.create(dirname)

            read_data = by(data = pattern_df$Path,
                            INDICES=pattern_df$Peak,
                            FUN=function(x) data.frame(read.csv(file=paste(x), header=TRUE, sep=",")))

            readList <- list()
            rawCounts = matrix(, nrow = length(peaks), ncol = n_mods)
            rnames <-c()
            cnames <-c()

            for (i in c(0:(length(peaks)-1))){
                rnames <-c(rnames, paste("chr",i, ":0-3300",sep=""))
            }

            for(i in c(0:(n_mods-1))) {
                cnames <-c(cnames, paste("mod_",i, sep=""))
            }

            rownames(rawCounts) <- rnames
            colnames(rawCounts) <- cnames

            for(i in c(0:(n_mods-1))) {
                col <- paste("mod_",i, sep="")
                tempList = list() 
                j<-0
                for(peak in peaks){
                    row<-paste("chr",j, ":0-3300",sep="")
                    df<-subset(read_data[[peak]], modification==i)
                    tempList[[row]]<-(as.numeric(df$position)+200+1)
                    rawCounts[row, col] <- nrow(df)
                    j<-j+1
                }
                readList[[col]] <- tempList
            }

            MMD@Reads <- list('Center'=readList)
            MMD@RawTotalCounts = rawCounts
            MMD <- compDists(MMD,dist.method='MMD2', background_intensity=0.35)
            MMD <- compDists(MMD,dist.method='MMD')
            dists <-MMD@DISTs$MMD
            dists2 <-MMD@DISTs$MMD2       
            write.csv(dists, file = paste0(dirname, "/MMD_old_dists.csv"))
            write.csv(dists2, file = paste0(dirname, "/MMD_dists.csv"))
        }
    }
}