In [20]:
library(stringi)

In [21]:
work_dir <- "ciriquant/work"
circ_dir <- "ciriquant/circs"
stringtie_dir <- "ciriquant/transcripts"

In [22]:
create_df <- function(samples, condition) {
  ciri_files <- file.path(circ_dir, paste0(samples, ".gtf"))
  stringtie_files <- file.path(stringtie_dir, paste0(samples, "_out.gtf"))
  conditions <- rep(condition, length(samples))
  return(data.frame("ciri" = ciri_files, "condition" = conditions, "stringtie" = stringtie_files, row.names = samples))
}

In [23]:
ciri_prepde <- function(df, wd) {
  samplesheet <- file.path(wd, "samplesheet_ciri.tsv")
  write.table(df, samplesheet, sep = "\t", quote = FALSE, row.names = TRUE, col.names = FALSE)

  lib_file <- file.path(wd, "lib_ciri.csv")
  circ_file <- file.path(wd, "circ_ciri.csv")
  bsj_file <- file.path(wd, "bsj_ciri.csv")
  ratio_file <- file.path(wd, "ratio_ciri.csv")

  system(paste("prep_CIRIquant -i", samplesheet, "--lib", lib_file, "--circ", circ_file, "--bsj", bsj_file, "--ratio", ratio_file))

  return(list(lib_file, circ_file, bsj_file, ratio_file))
}

In [24]:
stringtie_prepde <- function(df, wd) {
  samplesheet <- file.path(wd, "samplesheet_stringtie.tsv")
  write.table(df, samplesheet, sep = "\t", quote = FALSE, row.names = TRUE, col.names = FALSE)

  gene_file <- file.path(wd, "gene.csv")
  transcript_file <- file.path(wd, "transcript.csv")

  system(paste("prepDE.py -i", samplesheet, "-g", gene_file, "-t", transcript_file))

  return(list(gene_file, transcript_file))
}

ciriquant_de <- function(lib_file, bsj_file, gene_file, wd) {
  gene_results <- file.path(wd, "gene_results.csv")
  circ_results <- file.path(wd, "circ_results.csv")

  system(paste("CIRI_DE_replicate --lib", lib_file, "--bsj", bsj_file, "--gene", gene_file, "--out", circ_results, "--out2", gene_results))

  return(list(gene_results, circ_results))
}

In [44]:
run <- function(control, treatment) {
  control_df <- create_df(control, "C")
  treatment_df <- create_df(treatment, "T")
  df <- rbind(control_df, treatment_df)

  tryCatch({
    temp_dir <- file.path(work_dir, stri_rand_strings(1, 10))
    dir.create(temp_dir)

    p_ciri <- ciri_prepde(df[c("ciri", "condition")], temp_dir)
    p_stringtie <- stringtie_prepde(df[c("stringtie")], temp_dir)

    p_ciri_de <- ciriquant_de(p_ciri[[1]], p_ciri[[3]], p_stringtie[[1]], temp_dir)

    df_gene <- read.csv(p_ciri_de[[1]], row.names = 1)
    df_circ <- read.csv(p_ciri_de[[2]], row.names = 1)

    return(list(df_gene, df_circ))
  },
  finally = {
    # unlink(temp_dir, recursive = TRUE)
  })
}

In [26]:
phenotype <- read.csv("dea/phenotype.csv", header=TRUE, row.names = 1)
phenotype$transgene <- as.factor(phenotype$transgene)
phenotype$drug <- as.factor(phenotype$drug)
head(phenotype)

Unnamed: 0_level_0,condition,age,transgene,induction,drug,replicate
Unnamed: 0_level_1,<chr>,<int>,<fct>,<int>,<fct>,<int>
aging_12m_ESR1_no_1,12_ESR1_0_no,12,ESR1,0,no,1
aging_12m_ESR1_no_2,12_ESR1_0_no,12,ESR1,0,no,2
aging_12m_ESR1_no_3,12_ESR1_0_no,12,ESR1,0,no,3
aging_12m_ESR1_1w_1,12_ESR1_1_no,12,ESR1,1,no,1
aging_12m_ESR1_1w_2,12_ESR1_1_no,12,ESR1,1,no,2
aging_12m_ESR1_1w_3,12_ESR1_1_no,12,ESR1,1,no,3


In [34]:
circ_expression <- read.table("quantification/ciriquant.tsv", sep="\t", header=TRUE, row.names=1)
genes <- circ_expression$gene_id
names(genes) <- rownames(circ_expression)
circ_expression <- circ_expression[,rownames(phenotype)]
head(circ_expression)

Unnamed: 0_level_0,aging_12m_ESR1_no_1,aging_12m_ESR1_no_2,aging_12m_ESR1_no_3,aging_12m_ESR1_1w_1,aging_12m_ESR1_1w_2,aging_12m_ESR1_1w_3,aging_18m_ESR1_no_1,aging_18m_ESR1_no_2,aging_18m_ESR1_no_3,aging_18m_ESR1_1w_1,⋯,antiHormonal_20m_ESR1_letrozole_2m_3,antiHormonal_20m_ESR1_tamoxifen_2m_1,antiHormonal_20m_ESR1_tamoxifen_2m_2,antiHormonal_20m_ESR1_tamoxifen_2m_3,antiHormonal_20m_ESR1_no_1,antiHormonal_20m_ESR1_no_2,antiHormonal_20m_ESR1_no_3,antiHormonal_18m_ESR1_no_1,antiHormonal_18m_ESR1_no_2,antiHormonal_18m_ESR1_no_3
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr10:100324141-100332733,0.0,0,0,0,0,0,0.0,0.0,0,0.0307,⋯,0,0,0,0,0,0,0,0,0.0,0
chr10:100378279-100397266,0.0,0,0,0,0,0,0.0562,0.0,0,0.0,⋯,0,0,0,0,0,0,0,0,0.0,0
chr10:100387226-100398093,0.0,0,0,0,0,0,0.0281,0.0,0,0.0,⋯,0,0,0,0,0,0,0,0,0.0,0
chr10:102972968-102982154,0.0377,0,0,0,0,0,0.0,0.0368,0,0.0,⋯,0,0,0,0,0,0,0,0,0.0552,0
chr10:105079450-105159498,0.0,0,0,0,0,0,0.0,0.0,0,0.0,⋯,0,0,0,0,0,0,0,0,0.0,0
chr10:105079450-105249648,0.0,0,0,0,0,0,0.0,0.0,0,0.0,⋯,0,0,0,0,0,0,0,0,0.0,0


In [28]:
df_genes <- read.table("dea/gene_tpm.tsv", sep="\t", header=TRUE, row.names=1)
df_genes$gene_name <- NULL
# Order genes columns like phenotype rows
df_genes <- df_genes[,rownames(phenotype)]
phenotype$esr1 <- as.numeric(df_genes["Esr1",])
head(df_genes)

Unnamed: 0_level_0,aging_12m_ESR1_no_1,aging_12m_ESR1_no_2,aging_12m_ESR1_no_3,aging_12m_ESR1_1w_1,aging_12m_ESR1_1w_2,aging_12m_ESR1_1w_3,aging_18m_ESR1_no_1,aging_18m_ESR1_no_2,aging_18m_ESR1_no_3,aging_18m_ESR1_1w_1,⋯,antiHormonal_20m_ESR1_letrozole_2m_3,antiHormonal_20m_ESR1_tamoxifen_2m_1,antiHormonal_20m_ESR1_tamoxifen_2m_2,antiHormonal_20m_ESR1_tamoxifen_2m_3,antiHormonal_20m_ESR1_no_1,antiHormonal_20m_ESR1_no_2,antiHormonal_20m_ESR1_no_3,antiHormonal_18m_ESR1_no_1,antiHormonal_18m_ESR1_no_2,antiHormonal_18m_ESR1_no_3
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
0610005C13Rik,0.049659,0.022042,0.01438,0.050797,0.01698,0.035141,0.057923,0.0,0.0,0.0,⋯,0.122406,0.016963,0.0,0.0,0.022942,0.027626,0.0,0.052219,0.039258,0.0
0610006L08Rik,0.0,0.0,0.0,0.024856,0.0,0.0,0.0,0.0,0.0,0.0,⋯,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009605,0.0
0610009E02Rik,0.38168,0.687776,0.431627,0.29598,0.420362,0.278761,0.22629,0.336235,0.371526,0.516198,⋯,0.290387,0.265336,0.356291,0.525629,0.311458,0.496499,0.25348,0.524102,0.250238,0.40779
0610009L18Rik,1.968853,1.827257,2.10686,1.617287,1.377004,2.248239,2.442666,1.502402,1.702409,1.624864,⋯,2.75745,2.598394,2.159407,2.334797,2.232605,1.79231,1.588909,2.038981,1.627236,1.200321
0610010K14Rik,5.180386,6.145867,4.719103,6.447013,5.178654,6.821811,4.078035,5.470046,5.439292,5.037731,⋯,4.736633,5.265562,4.903018,4.529731,5.492201,6.087096,7.292915,7.0018,5.633049,4.343781
0610025J13Rik,0.028554,0.019011,0.048791,0.015226,0.015269,0.023701,0.0,0.007208,0.006963,0.0,⋯,0.027519,0.007627,0.036733,0.027936,0.030946,0.0,0.013214,0.015013,0.0,0.011979


In [37]:
correlations <- t(apply(circ_expression, 1, function(row) {
    res <- cor.test(as.numeric(row), phenotype$esr1)
    return(c(res$estimate, res$p.value))
}))
correlations <- as.data.frame(correlations)
colnames(correlations) <- c("correlation", "p.value")
correlations$p.adj <- p.adjust(correlations$p.value, method = "BH")
correlations <- correlations[order(correlations$p.adj),]
correlations <- cbind(correlations, genes[rownames(correlations)])
head(correlations, 20)

Unnamed: 0_level_0,correlation,p.value,p.adj,genes[rownames(correlations)]
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<chr>
chr10:106798823-106840676,0.3765251,0.0011146508,0.2346631,Acss3
chr10:128354797-128358984,0.3498314,0.0025930589,0.2346631,Esyt1
chr10:17783782-17791780,0.4048691,0.0004192854,0.2346631,Heca
chr10:25143206-25165628,0.4165935,0.0002726275,0.2346631,Akap7
chr10:63124831-63135018,0.3498314,0.0025930589,0.2346631,Herc4
chr10:69794748-69816224,0.3671009,0.0015140893,0.2346631,Ank3
chr10:76272260-76273050,0.3505902,0.0025340405,0.2346631,Pcnt
chr10:79862371-79862761,0.3498314,0.0025930589,0.2346631,Arhgap45
chr10:90895435-90897691,0.3498314,0.0025930589,0.2346631,Apaf1
chr10:90895910-90915950,0.3498314,0.0025930589,0.2346631,Apaf1


# Aging

In [41]:
head(phenotype)

Unnamed: 0_level_0,condition,age,transgene,induction,drug,replicate,esr1
Unnamed: 0_level_1,<chr>,<int>,<fct>,<int>,<fct>,<int>,<dbl>
aging_12m_ESR1_no_1,12_ESR1_0_no,12,ESR1,0,no,1,2.903286
aging_12m_ESR1_no_2,12_ESR1_0_no,12,ESR1,0,no,2,5.555891
aging_12m_ESR1_no_3,12_ESR1_0_no,12,ESR1,0,no,3,4.875684
aging_12m_ESR1_1w_1,12_ESR1_1_no,12,ESR1,1,no,1,4.954692
aging_12m_ESR1_1w_2,12_ESR1_1_no,12,ESR1,1,no,2,3.282341
aging_12m_ESR1_1w_3,12_ESR1_1_no,12,ESR1,1,no,3,5.727712


In [45]:
young <- phenotype[phenotype$age < 18,]
old <- phenotype[phenotype$age >= 18,]

run(rownames(young), rownames(old))

“error in running command”
“cannot open file 'ciriquant/work/Z0ioZKNAjd/gene_results.csv': No such file or directory”


ERROR: Error in file(file, "rt"): cannot open the connection
