In [None]:
library(dplyr)
library(tidyr)

In [None]:
HCT_Ctrl <- read.table("HCT116_supercoiling_data.100k.bg", header = F)
colnames(HCT_Ctrl) <- c('chrom', 'start', 'end', 'value')

HCT.neg <- HCT_Ctrl %>%
          filter(value > 0.15)
HCT.pos <- HCT_Ctrl %>%
          filter(value < -0.1)

write.table(HCT.neg.position, file = "HCT.neg.bdg", quote=FALSE, row.names=FALSE, col.names=FALSE, sep = "\t")
write.table(HCT.pos.position, file = "HCT.pos.bdg", quote=FALSE, row.names=FALSE, col.names=FALSE, sep = "\t")

genome.coord <- get(load("hg38.ensembl.genebody.protein_coding.RData"))
head(genome.coord)



"""
bedtools merge -i HCT.neg.bdg > HCT_neg.bdg
bedtools merge -i HCT.pos.bdg > HCT_pos.bdg

bedtools intersect -a HCT_neg.bdg -b genes.bdg -wb > negative_genes.bdg
bedtools intersect -a HCT_pos.bdg -b genes.bdg -wb > positive_genes.bdg

"""

negative_genes <- read.table("negative_genes.bdg", header =F)
positive_genes <- read.table("positive_genes.bdg", header =F)
neg_name <- negative_genes[,8, drop = FALSE]
pos_name <- positive_genes[,8, drop = FALSE]


#remove duplicate gene names
negative_name <- neg_name %>% distinct(V8, .keep_all = TRUE)
nrow(negative_name)
positive_name <- pos_name %>% distinct(V8, .keep_all = TRUE)
nrow(positive_name)

In [None]:
# select active genes which TPM > 0.2, select gene length longer than 2kb
active_genes <- readRDS("/data/yaoq2/TTseq/2nd/kallisto/select_genes.RData")

In [None]:
negative_domain_activegenes <- subset(negative_name, negative_genes$V8 %in% as.character(active_genes$SYMBOL))
negative_domain_activegenes <- negative_domain_activegenes %>% distinct(V8, .keep_all = TRUE)
dim(negative_domain_activegenes)

positive_domain_activegenes <- subset(positive_name, positive_genes$V8 %in% as.character(active_genes$SYMBOL))
positive_domain_activegenes <- positive_domain_activegenes %>% distinct(V8, .keep_all = TRUE)
dim(positive_domain_activegenes)


#add "" to each name, to fit ngsplot
negative_domain_activegenes$V8 <- paste0("\"", negative_domain_activegenes$V8, "\"")
positive_domain_activegenes$V8 <- paste0("\"", positive_domain_activegenes$V8, "\"")

write.table(negative_domain_activegenes, file = "neg_active_name.bdg", quote=FALSE, row.names=FALSE, col.names=FALSE, sep = "\t")
write.table(positive_domain_activegenes, file = "pos_active_name.bdg", quote=FALSE, row.names=FALSE, col.names=FALSE, sep = "\t")