**Process RNAseq quantifications from AOCS**

Transcript level RNAseq quantifications for AOCS (N=80) were aggregated to gene level, normalised (VST) and adjusted for tumour cellularity. Resultant matrix is variance stabilising transformed counts per gene per sample.

In [None]:
setwd("/Volumes/igmm/semple-lab/ianP/RNAseq_quantifications/")
files_aocs<-dir("AOCS",pattern="quant.sf",recursive = T,full.names = TRUE)
rna_reps_to_exclude<-read.table("Paths_RNAexclusions.txt",sep="\t")
rna_reps_to_exclude<-as.character(rna_reps_to_exclude[,1])

new_files_orig<-setdiff(files_aocs,rna_reps_to_exclude)

tx2knownGene <- read.csv("tx2gene.csv",header = F)

names(new_files_orig)<-do.call("cbind",strsplit(new_files_orig,split = "[/.]"))[2,]
txi.salmon <- tximport(new_files_orig, type = "salmon", tx2gene = tx2knownGene)

In [None]:
sample<-read.table("Tumour_cellularity_estimates.txt",sep="\t",header=T,stringsAsFactors=F)
sample_expr<-sample[,c("Sample","Purity")]

rna_sampleids<-colnames(head(txi.salmon$counts))
rna_short_sampleids<-rep(NA,length(rna_sampleids))
rna_short_sampleids<-substr(rna_sampleids,1,8)

rna<-data.frame(RNA_sample=rna_sampleids,Sample=rna_short_sampleids)
rna<-rna[as.character(rna$Sample) %in% as.character(sample_expr$Sample),]

coldata<-merge(rna,sample_expr,by="Sample",all.x=T)

rownames(coldata)<-as.character(coldata$RNA_sample)
coldata<-coldata[as.character(rna$RNA_sample),]
rownames(coldata)<-c(1:80)

dds_correct <- DESeqDataSetFromTximport(txi.salmon, 
                               colData = coldata,
                                design=~Purity)

dds_correct <- DESeq(dds_correct)
vsd <- vst(dds_correct , blind=FALSE)

In [None]:
all_vsd<-assay(vsd)
colnames(all_vsd)<-coldata$Sample
write.table(all_vsd,file="AOCS/AOCS_TPM_VST.txt",sep="\t",quote=F)