In [None]:
suppressMessages({
    library(data.table)
    library(dplyr)
    library(tidyverse)
    library(ggplot2)
    library(ChIPseeker)
    library(TxDb.Hsapiens.UCSC.hg38.knownGene)
    library(GenomicRanges)
    library(rtracklayer)
    library(IRanges)
    library(org.Hs.eg.db)
    library(ChIPpeakAnno)
})



dir.create('../_m', showWarnings = TRUE, recursive = TRUE)
setwd('../_m')

In [None]:
df <- fread('../../_m/chipseeker_controlpeaks_stringent_annotation_homertss.tsv') #load all individual brain/samples peaks

In [None]:
table(df$sample_id)

In [None]:
files_path <- Sys.glob('../../overlap_peaks/_m/*.tsv') %>% grep('Promoter|Enhancer',.,value = T)
files_path

# SET A - active_enhancers, inactive_enhancers, active_promoters

In [None]:
# bind all annotated marker files into one dataframe and add the sample_id column
df_annotation <- dplyr::bind_rows(
                    lapply(setNames(files_path,  tools::file_path_sans_ext(basename(files_path))), fread), 
                    .id = 'sample_id'
                    ) %>%
                 mutate(sample_id = gsub('_annotated','',sample_id),
                        loc_id = paste0(seqnames,':',start,'-',end)
                        ) %>%
                 group_by(sample_id) %>%
                 mutate(peak_id = paste0(sample_id,'_',1:n())) %>%
                 ungroup() %>%
                 as.data.frame()


row.names(df_annotation) <- df_annotation$peak_id
                    
table(df_annotation$sample_id)
#head(df_annotation)
#tail(df_annotation)



active_enhancers <- subset(df_annotation, sample_id == 'Active_Enhancers')
active_promoters <- subset(df_annotation, sample_id == 'Active_Promoter')
inactive_enhancers <- subset(df_annotation, sample_id == 'Inactive_Enhancers')

# set B - 6 brains cst/atlas, all cst, all atlas and all cst+atlas

In [None]:
# set B - 6 brains cst/atlas

setd1a <- df %>%
                filter(grepl('SETD1A',sample_id))

setd1a_tags <- unique(setd1a$sample_id) %>% 
                as.data.frame() %>%
                mutate(old_peak_id = c('Brain3_cst','Brain3_atlas','Brain2_cst','Brain2_atlas','Brain1_cst','Brain1_atlas')) %>%
                dplyr::rename('sample_id' = '.')

#setd1a_tags

In [None]:
all_cst_3brains <- df %>%
                        filter(grepl('CST',sample_id)) %>%
                        #dplyr::select(sample_id) %>%
                        left_join(setd1a_tags) %>%
                        mutate(loc_id = paste0(seqnames,':',start,'-',end)) %>%
                        distinct(loc_id,.keep_all = T) %>%
                        group_by(old_peak_id) %>%
                        mutate(peak_id = paste0(old_peak_id,'_',1:n())) %>%
                        ungroup() %>%
                        as.data.frame()


row.names(all_cst_3brains) <- all_cst_3brains$peak_id

table(all_cst_3brains$old_peak_id)
nrow(all_cst_3brains)

In [None]:
all_atlas_3brains <- df %>%
                        filter(grepl('Atlas',sample_id)) %>%
                        #dplyr::select(sample_id) %>%
                        left_join(setd1a_tags) %>%
                        mutate(loc_id = paste0(seqnames,':',start,'-',end)) %>%
                        distinct(loc_id,.keep_all = T) %>%
                        group_by(old_peak_id) %>%
                        mutate(peak_id = paste0(old_peak_id,'_',1:n())) %>%
                        ungroup() %>%
                        as.data.frame()


row.names(all_atlas_3brains) <- all_atlas_3brains$peak_id

table(all_atlas_3brains$old_peak_id)
nrow(all_atlas_3brains)

In [None]:
## all unique cst
all_cst <- df %>%
                filter(grepl('CST',sample_id)) %>%
                #dplyr::select(sample_id) %>%
                #left_join(setd1a_tags) %>%
                mutate(loc_id = paste0(seqnames,':',start,'-',end),
                       old_peak_id = 'all_cst') %>%
                distinct(loc_id,.keep_all = T) %>% #get unique peaks only
                #group_by(old_peak_id) %>%
                mutate(peak_id = paste0(old_peak_id,'_',1:n())) %>%
                ungroup() %>%
                as.data.frame()


row.names(all_cst) <- all_cst$peak_id

dim(all_cst)

In [None]:
## all unique atlas

all_atlas <- df %>%
                filter(grepl('Atlas',sample_id)) %>%
                #dplyr::select(sample_id) %>%
                #left_join(setd1a_tags) %>%
                mutate(loc_id = paste0(seqnames,':',start,'-',end),
                       old_peak_id = 'all_atlas') %>%
                distinct(loc_id,.keep_all = T) %>% #get unique peaks only
                #group_by(old_peak_id) %>%
                mutate(peak_id = paste0(old_peak_id,'_',1:n())) %>%
                ungroup() %>%
                as.data.frame()


row.names(all_atlas) <- all_atlas$peak_id

dim(all_atlas)

In [None]:
## all unique cst+atlas (not liberal)

all_setd1a <- df %>%
                filter(grepl('SETD1A',sample_id)) %>%
                #dplyr::select(sample_id) %>%
                #left_join(setd1a_tags) %>%
                mutate(loc_id = paste0(seqnames,':',start,'-',end),
                       old_peak_id = 'all_setd1a') %>%
                distinct(loc_id,.keep_all = T) %>% #get unique peaks only
                #group_by(old_peak_id) %>%
                mutate(peak_id = paste0(old_peak_id,'_',1:n())) %>%
                ungroup() %>%
                as.data.frame()


row.names(all_setd1a) <- all_setd1a$peak_id

dim(all_setd1a)

In [None]:
#lets check if our numbers are matching:


In [None]:
print(paste0('n of peaks from ', 'all_cst', ': ', all_cst %>% nrow()))
print(paste0('n of peaks from ', 'all_cst_3brains', ': ', all_cst_3brains %>% nrow()))

print('####')
print(paste0('n of peaks from ', 'all_atlas', ': ', all_atlas %>% nrow()))
print(paste0('n of peaks from ', 'all_atlas_3brains', ': ', all_atlas %>% nrow()))

print(paste0('total n of peaks: ', 13811+24675))

In [None]:
print(paste0('n of peaks from ', 'all_setd1a', ': ', all_setd1a %>% nrow()))

# here, when using all setd1a peaks (everything from atlas+cst together) we got 6 peaks less, because any 'new' duplicated peaks from cst and atlas brains needed to be removed!

In [None]:
#ok, everything seems right! lets calculate the overlaps

# all_cst_3brains
# all_atlas_3brains

### findOverlapsOfPeaks(): 
### Set A (active_enhancers, active_promoter, inactive_enhancers)  
### x 
### Set B (all_cst_3brains)

In [None]:
analysis_id = 'all_cst_3brains'
annotation = 'Active_Enhancers'
annotation_var = active_enhancers
dir.create(analysis_id)

ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()
dim(df_overlap)


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)

seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )


names_setb <- unique(get(analysis_id)$old_peak_id)
setb_tag <- data.frame(SetB = c(names_setb[1], 
                                names_setb[2], 
                                names_setb[3]),
                       SetB_universe = c(subset(get(analysis_id), old_peak_id == names_setb[1]) %>% nrow(),
                                         subset(get(analysis_id), old_peak_id == names_setb[2]) %>% nrow(),
                                         subset(get(analysis_id), old_peak_id == names_setb[3]) %>% nrow())
                       ) %>% arrange(SetB)

df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id_peaks2) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary

fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)



In [None]:
analysis_id = 'all_cst_3brains'
annotation = 'Active_Promoter'
annotation_var = active_promoters
dir.create(analysis_id)

ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()
dim(df_overlap)


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)

seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )


names_setb <- unique(get(analysis_id)$old_peak_id)
setb_tag <- data.frame(SetB = c(names_setb[1], 
                                names_setb[2], 
                                names_setb[3]),
                       SetB_universe = c(subset(get(analysis_id), old_peak_id == names_setb[1]) %>% nrow(),
                                         subset(get(analysis_id), old_peak_id == names_setb[2]) %>% nrow(),
                                         subset(get(analysis_id), old_peak_id == names_setb[3]) %>% nrow())
                       ) %>% arrange(SetB)

df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id_peaks2) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary

fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)



In [None]:
analysis_id = 'all_cst_3brains'
annotation = 'Inactive_Enhancers'
annotation_var = inactive_enhancers
dir.create(analysis_id)

ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()
dim(df_overlap)


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)

seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )


names_setb <- unique(get(analysis_id)$old_peak_id)
setb_tag <- data.frame(SetB = c(names_setb[1], 
                                names_setb[2], 
                                names_setb[3]),
                       SetB_universe = c(subset(get(analysis_id), old_peak_id == names_setb[1]) %>% nrow(),
                                         subset(get(analysis_id), old_peak_id == names_setb[2]) %>% nrow(),
                                         subset(get(analysis_id), old_peak_id == names_setb[3]) %>% nrow())
                       ) %>% arrange(SetB)

df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id_peaks2) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary

fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)



### findOverlapsOfPeaks(): 
### Set A (active_enhancers, active_promoter, inactive_enhancers)  
### x 
### Set B (all_atlas_3brains)

In [None]:
analysis_id = 'all_atlas_3brains'
annotation = 'Active_Enhancers'
annotation_var = active_enhancers
dir.create(analysis_id)

ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()
dim(df_overlap)


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)

seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )


names_setb <- unique(get(analysis_id)$old_peak_id)
setb_tag <- data.frame(SetB = c(names_setb[1], 
                                names_setb[2], 
                                names_setb[3]),
                       SetB_universe = c(subset(get(analysis_id), old_peak_id == names_setb[1]) %>% nrow(),
                                         subset(get(analysis_id), old_peak_id == names_setb[2]) %>% nrow(),
                                         subset(get(analysis_id), old_peak_id == names_setb[3]) %>% nrow())
                       ) %>% arrange(SetB)

df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id_peaks2) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary

fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)

In [None]:
#########

analysis_id = 'all_atlas_3brains'
annotation = 'Active_Promoter'
annotation_var = active_promoters
dir.create(analysis_id)

ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()
dim(df_overlap)


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)

seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )


names_setb <- unique(get(analysis_id)$old_peak_id)
setb_tag <- data.frame(SetB = c(names_setb[1], 
                                names_setb[2], 
                                names_setb[3]),
                       SetB_universe = c(subset(get(analysis_id), old_peak_id == names_setb[1]) %>% nrow(),
                                         subset(get(analysis_id), old_peak_id == names_setb[2]) %>% nrow(),
                                         subset(get(analysis_id), old_peak_id == names_setb[3]) %>% nrow())
                       ) %>% arrange(SetB)

df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id_peaks2) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary

fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)

In [None]:
##########

analysis_id = 'all_atlas_3brains'
annotation = 'Inactive_Enhancers'
annotation_var = inactive_enhancers
dir.create(analysis_id)


ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()
dim(df_overlap)


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)

seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )


names_setb <- unique(get(analysis_id)$old_peak_id)
setb_tag <- data.frame(SetB = c(names_setb[1], 
                                names_setb[2], 
                                names_setb[3]),
                       SetB_universe = c(subset(get(analysis_id), old_peak_id == names_setb[1]) %>% nrow(),
                                         subset(get(analysis_id), old_peak_id == names_setb[2]) %>% nrow(),
                                         subset(get(analysis_id), old_peak_id == names_setb[3]) %>% nrow())
                       ) %>% arrange(SetB)

df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id_peaks2) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary

fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)

## findOverlapsOfPeaks():
### *Set A (active enhancers, active promoters or inactive enhancers 
##### x 
### Set B (all CST)

In [None]:
analysis_id = 'all_cst'
annotation = 'Active_Enhancers'
annotation_var = active_enhancers
dir.create(analysis_id)


ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)


seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )

setb_tag <- data.frame(SetB = c(paste0(analysis_id)),
                       SetB_universe = c(dim(get(analysis_id))[1])
                       )


df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary


fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)


##############

In [None]:
analysis_id = 'all_cst'
annotation = 'Inactive_Enhancers'
annotation_var = inactive_enhancers
dir.create(analysis_id)

ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)


seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )

setb_tag <- data.frame(SetB = c(paste0(analysis_id)),
                       SetB_universe = c(dim(get(analysis_id))[1])
                       )


df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary


fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)


##############

In [None]:
analysis_id = 'all_cst'
annotation = 'Active_Promoter'
annotation_var = active_promoters
dir.create(analysis_id)

ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)


seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )

setb_tag <- data.frame(SetB = c(paste0(analysis_id)),
                       SetB_universe = c(dim(get(analysis_id))[1])
                       )

df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary


fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)

## findOverlapsOfPeaks():
### *Set A (active enhancers, active promoters or inactive enhancers 
##### x 
### Set B (all Atlas)

In [None]:
analysis_id = 'all_atlas'
annotation = 'Active_Enhancers'
annotation_var = active_enhancers
dir.create(analysis_id)


ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)


seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )

setb_tag <- data.frame(SetB = c(paste0(analysis_id)),
                       SetB_universe = c(dim(get(analysis_id))[1])
                       )


df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary


fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)


##############

In [None]:
analysis_id = 'all_atlas'
annotation = 'Inactive_Enhancers'
annotation_var = inactive_enhancers
dir.create(analysis_id)

ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)


seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )

setb_tag <- data.frame(SetB = c(paste0(analysis_id)),
                       SetB_universe = c(dim(get(analysis_id))[1])
                       )


df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary


fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)


##############

In [None]:
analysis_id = 'all_atlas'
annotation = 'Active_Promoter'
annotation_var = active_promoters
dir.create(analysis_id)

ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)


seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )

setb_tag <- data.frame(SetB = c(paste0(analysis_id)),
                       SetB_universe = c(dim(get(analysis_id))[1])
                       )


df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary


fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)

## findOverlapsOfPeaks():
### *Set A (active enhancers, active promoters or inactive enhancers 
##### x 
### Set B (all_setd1a atlas+cst)

In [None]:
analysis_id = 'all_setd1a'
annotation = 'Active_Enhancers'
annotation_var = active_enhancers
dir.create(analysis_id)


ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)


seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )

setb_tag <- data.frame(SetB = c(paste0(analysis_id)),
                       SetB_universe = c(dim(get(analysis_id))[1])
                       )


df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary


fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)


##############

In [None]:
analysis_id = 'all_setd1a'
annotation = 'Inactive_Enhancers'
annotation_var = inactive_enhancers
dir.create(analysis_id)

ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)


seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )

setb_tag <- data.frame(SetB = c(paste0(analysis_id)),
                       SetB_universe = c(dim(get(analysis_id))[1])
                       )


df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary


fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)


##############

In [None]:
analysis_id = 'all_setd1a'
annotation = 'Active_Promoter'
annotation_var = active_promoters
dir.create(analysis_id)

ol <- findOverlapsOfPeaks(A=makeGRangesFromDataFrame(annotation_var,keep.extra.columns = T), 
                          B=makeGRangesFromDataFrame(get(analysis_id), keep.extra.columns = T),
                          ignore.strand=TRUE,
                          connectedPeaks="merge")
#ol$overlappingPeaks


df_overlap <- ol$overlappingPeaks[[1]] %>% as.data.frame()


colnames(df_overlap)[24:(ncol(df_overlap)-2)] <- paste0(colnames(df_overlap)[24:(ncol(df_overlap)-2)],'_peaks2')

fwrite(df_overlap,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'.tsv'),quote=F,sep='\t',row.names=T)


seta_tag <- data.frame(SetA = c(paste0(annotation)),
                       SetA_universe = c(dim(annotation_var)[1])
                      )

setb_tag <- data.frame(SetB = c(paste0(analysis_id)),
                       SetB_universe = c(dim(get(analysis_id))[1])
                       )

df_overlap_summary <- table(df_overlap$sample_id, df_overlap$old_peak_id) %>% 
                        as.data.frame() %>%
                        rename('Var1' = 'SetA', 'Var2' = 'SetB', 'Freq' = 'Overlap') %>%
                        left_join(seta_tag) %>%
                        left_join(setb_tag) %>%
                        #mutate(percentage_overlap = Overlap / (SetA_universe + SetB_universe - Overlap)) %>%
                        mutate(percentage_overlap = Overlap / SetB_universe) %>%
                        relocate(SetA,SetB,SetA_universe,SetB_universe,Overlap,percentage_overlap)


df_overlap_summary


fwrite(df_overlap_summary,paste0(analysis_id,'/overlap_peakids_',annotation,'_',analysis_id,'_','summary','.tsv'),quote=F,sep='\t',row.names=F)

In [None]:
sessionInfo()