# DESeq2 Analysis of Neuron and NPCs for SETD1a paper revisions

# Install and Load Packages

In [None]:
suppressMessages({
    library(tidyverse)
    library(BiocManager)
    library(DESeq2)
    library(GenomicFeatures)
    library(SummarizedExperiment)
    library(openxlsx)
    library(EnhancedVolcano)
    library(data.table)
    })

# Import and Format Data Structures

## Metadata

In [None]:
metaData <- read.xlsx('/ceph/projects/tomoyo_SETD1A_bulk_RNA-seq/Meta_sample_info/120522_Sample_metadata.xlsx', colNames=TRUE, startRow = 1)

head(metaData)

dim(metaData)

In [None]:
head(colnames(metaData))
colnames(metaData) <- gsub("\\.", "_", colnames(metaData))
colnames(metaData) <- gsub("-", "_", colnames(metaData))
head(colnames(metaData))

In [None]:
table(metaData$DEG_Set_1)
table(metaData$DEG_Set_2)

In [None]:
npcMetaData <- filter(metaData, DEG_Set_1 == "Hetero" | DEG_Set_1 == "WT")
head(npcMetaData)
dim(npcMetaData)

In [None]:
npcMetaData$DEG_Set_1


In [None]:
row.names(npcMetaData) <- gsub("\\#", "", npcMetaData$New_Sample_ID)
head(npcMetaData)

In [None]:
neuroMetaData <- filter(metaData, DEG_Set_2 == "Hetero" | DEG_Set_2 == "WT")
head(neuroMetaData)
dim(neuroMetaData)

In [None]:
row.names(neuroMetaData) <- gsub("\\#", "", neuroMetaData$New_Sample_ID)
head(neuroMetaData)

In [None]:
neuroMetaData$DEG_Set_2

## Counts Data

In [None]:
geneCounts <- as.data.frame(read_tsv("/ceph/projects/tomoyo_SETD1A_bulk_RNA-seq/fastq/hisat2/featurecounts_genes_M/count_table/_m/counts.tsv", col_names = TRUE)) #read_tsv imported the data as a tibble, so I'll change that to 
geneCounts2 <- as.data.frame(read_tsv("/ceph/projects/tomoyo_SETD1A_bulk_RNA-seq_jun_2022/hisat2/featurecounts_genes_M/count_table/_m/counts.tsv", col_names = TRUE))
head(geneCounts) 
dim(geneCounts)
head(geneCounts2)
dim(geneCounts2)


In [None]:
geneCounts[12, 1:15]

In [None]:
#We only want the LIBD2c1_CM130_51_NPC sequencing data from the June 2022 sequencing run (I believe) so, to avoid duplicates, I'm removing this data column
geneCounts <- dplyr::select(geneCounts,-LIBD2c1_CM130_51_NPC)
head(geneCounts)
dim(geneCounts)

In [None]:
totalGeneCounts <- inner_join(geneCounts, geneCounts2, ) #Only genes that were detected in both sequencing runs
dim(totalGeneCounts)
head(totalGeneCounts)

In [None]:
length(colnames(totalGeneCounts))
length(unique(colnames(totalGeneCounts)))

In [None]:
row.names(totalGeneCounts) <- totalGeneCounts$Geneid
totalGeneCounts <- totalGeneCounts[,-1]
head(totalGeneCounts)
dim(totalGeneCounts)

In [None]:
row.names(npcMetaData) %in% colnames(totalGeneCounts)

In [None]:
row.names(neuroMetaData) %in% colnames(totalGeneCounts)

In [None]:
npcgeneCounts <- totalGeneCounts[row.names(npcMetaData)]
head(npcgeneCounts)

In [None]:
neurogeneCounts <- totalGeneCounts[row.names(neuroMetaData)]
head(neurogeneCounts)
dim(neurogeneCounts)

In [None]:
row.names(npcMetaData)

In [None]:
row.names(neuroMetaData)

## Make se objects

In [None]:
seNPC <- SummarizedExperiment(assays = list(counts=as.matrix(npcgeneCounts)), colData = npcMetaData)
seNeuro <- SummarizedExperiment(assays = list(counts=as.matrix(neurogeneCounts)), colData = neuroMetaData)


In [None]:
seNPC

In [None]:
seNeuro

## Create DDS objects

In [None]:
ddsNPC <- DESeqDataSet(seNPC, ~iPSC_line + Seq_Run + DEG_Set_1)
ddsNPC

In [None]:
npcMetaData

In [None]:
ddsNeuro <- DESeqDataSet(seNeuro, ~iPSC_line + Seq_Run + DEG_Set_2)
ddsNeuro

# Filter and Prepare for Analysis

## Relevel DDS objects

In [None]:
levels(ddsNPC$DEG_Set_1)
ddsNPC$DEG_Set_1 <- relevel(ddsNPC$DEG_Set_1, "WT")
levels(ddsNPC$DEG_Set_1)

In [None]:
levels(ddsNeuro$DEG_Set_2)
ddsNeuro$DEG_Set_2 <- relevel(ddsNeuro$DEG_Set_2, "WT")
levels(ddsNeuro$DEG_Set_2)

## Remove Genes Below Expression Threshold and Estimate Size Factors

In [None]:
#Create vectors of minimally expressed genes
thresholdGenesNPC <- rowSums(counts(ddsNPC)) >= 5
thresholdGenesNeuro <- rowSums(counts(ddsNeuro)) >= 5

In [None]:
#Estimate Size Factors
ddsNPC <- estimateSizeFactors(ddsNPC)
ddsNeuro <- estimateSizeFactors(ddsNeuro)

In [None]:
#Subset by minimally expressed gene vectors
ddsNPC <- ddsNPC[thresholdGenesNPC,]
ddsNeuro <- ddsNeuro[thresholdGenesNeuro,]

In [None]:
dim(ddsNPC)

In [None]:
 dim(ddsNeuro)

In [None]:
design(ddsNPC)
design(ddsNeuro)


# Run DESeq Analysis

In [None]:
ddsNPC <- DESeq(ddsNPC)

In [None]:
ddsNeuro <- DESeq(ddsNeuro)

## Write out counts data table

In [None]:
# Read in gene annotation file from previous analysis

namesTable <- read_tsv(file="/ceph/projects/tomoyo_SETD1A_bulk_RNA-seq/analysis/annotation/_m/annotation_gene-unique-info.tsv", col_names = TRUE) %>% dplyr::select(c("gene_id", "gene_name"))
head(namesTable)
dim(namesTable)


In [None]:
df_NPC <- as.data.frame(assay(ddsNPC))
head(df_NPC)

df_Neuro <- as.data.frame(assay(ddsNeuro))
head(df_Neuro)

In [None]:
df_NPC$gene_id <- row.names(df_NPC)

head(df_NPC)

df_Neuro$gene_id <- row.names(df_Neuro)

head(df_Neuro)

In [None]:
df_NPC <- left_join(df_NPC, namesTable, by = 'gene_id')
head(df_NPC)

df_Neuro <- left_join(df_Neuro, namesTable, by = 'gene_id')
head(df_Neuro)

In [None]:
output <- "../_m/"

fwrite(df_NPC, 
       file = paste0(output,'NPCgeneCounts.tsv'),
       quote=F,sep='\t',
       row.names=F)


fwrite(df_NPC, 
       file = paste0(output,'NeurogeneCounts.tsv'),
       quote=F,sep='\t',
       row.names=F)


In [None]:
## Normaling by Logfold2+1 changes

In [None]:
tempNPC <- cbind(log2(df_NPC[,1:(length(df_NPC)-2)])+1, df_NPC[,-1:-(length(df_NPC)-2)])
dim(tempNPC)

fwrite(tempNPC,
       file=paste0(output,'NPCgeneCounts_log2+1.tsv'),
       quote=F,
       sep='\t',
       row.names=F)

tempNeuro <- cbind(log2(df_Neuro[,1:(length(df_Neuro)-2)])+1, df_Neuro[,-1:-(length(df_Neuro)-2)])
dim(tempNeuro)

fwrite(tempNeuro,
       file=paste0(output,'NeurogeneCounts_log2+1.tsv'),
       quote=F,
       sep='\t',
       row.names=F)

In [None]:
head(df_NPC[,-1:-(length(df_NPC)-2)])
head(df_NPC)

## Log Fold Change Adjustment

In [None]:
resNPC <- lfcShrink(ddsNPC, coef=paste0(resultsNames(ddsNPC)[length(resultsNames(ddsNPC))]),type='apeglm')

In [None]:
resNeuro <- lfcShrink(ddsNeuro, coef=paste0(resultsNames(ddsNeuro)[length(resultsNames(ddsNeuro))]),type='apeglm')

In [None]:
resNPC

In [None]:
resNeuro

In [None]:
# filter(resNeuro$padj <0.05)
# filter(resNeuro2$padj <0.05)

In [None]:
# resNeuro %>% as.data.frame() %>% filter(pvalue < 0.05)
resNPC %>% as.data.frame() %>% filter(padj < 0.1) %>% dim()

resNeuro %>% as.data.frame() %>% filter(padj < 0.1) %>% dim()
#dim(resNeuro %>% as.data.frame() %>% filter(pvalue < 0.05))

# Results Plots

In [None]:
# MA Plots
DESeq2::plotMA(resNPC)

DESeq2::plotMA(resNPC, ylim= c(-3,3))

DESeq2::plotMA(resNeuro)

DESeq2::plotMA(resNeuro, ylim=c(-3,3))


In [None]:
#Dispersion Estimate Plots
plotDispEsts(ddsNPC)

plotDispEsts(ddsNeuro)


In [None]:
#Volcano Plots

# Adding Gene Names to Results Tables

In [None]:
head(resNPC)

In [None]:
resNPC$gene_id <- rownames(resNPC)
head(resNPC)

In [None]:
resNeuro$gene_id <- rownames(resNeuro)
head(resNeuro)

In [None]:
resNPC_labeled <- left_join(as.data.frame(resNPC), namesTable, by = "gene_id")
head(resNPC)
head(resNPC_labeled)

In [None]:
resNeuro_labeled <- left_join(as.data.frame(resNeuro), namesTable, by = "gene_id")
head(resNeuro)
head(resNeuro_labeled)

In [None]:
row.names(resNPC_labeled) <- resNPC_labeled$gene_id
head(resNPC_labeled)

In [None]:
row.names(resNeuro_labeled) <- resNeuro_labeled$gene_id
head(resNeuro_labeled)

In [None]:
EnhancedVolcano(resNPC_labeled,
                lab = resNPC_labeled$gene_name,
                x = 'log2FoldChange',
                y = 'pvalue',
                title = "NPC Hetero vs WT"
               )

In [None]:
EnhancedVolcano(resNeuro_labeled,
                lab = resNeuro_labeled$gene_name,
                x = 'log2FoldChange',
                y = 'pvalue',
                title = "Neuro Hetero vs WT"
               )

# Comparison of t-statistics between NPCs and Neurons

In [None]:
## Calculate t-stats

In [None]:
head(resNPC_labeled)

In [None]:
t_stat_selection <- function(data, cell_type, cols = c("gene_id","gene_name", "t_stat", "baseMean", "pvalue", "padj", "log2FoldChange", "lfcSE")){
    new <- mutate(data, t_stat = log2FoldChange / lfcSE)
    new <- dplyr::select(new, one_of(cols))
    colnames(new) <- c(colnames(new)[1:2], paste0(colnames(new)[3:length(colnames(new))], "_", cell_type))
    new
    }

In [None]:
table(is.na(t_stat_selection(resNPC_labeled, "npc")$t_stat_npc))
table(is.na(t_stat_selection(resNeuro_labeled, "neuro")$t_stat_neuro))

In [None]:
head(t_stat_selection(resNPC_labeled, "npc"))

In [None]:
head(t_stat_selection(resNeuro_labeled, "neuro"))

In [None]:
t_stat_info <- inner_join(t_stat_selection(resNPC_labeled, "npc"), t_stat_selection(resNeuro_labeled, "neuro"), by = "gene_id")

In [None]:
head(t_stat_info)
dim(t_stat_info)
tail(t_stat_info)

In [None]:
dim(resNPC_labeled)
dim(resNeuro_labeled)

In [None]:
ggplot(data = t_stat_info) +
(mapping = aes(t_stat_npc, t_stat_neuro)) +
geom_smooth(method=lm) +
geom_point() +
coord_cartesian(xlim = c(-4,8), ylim=c(-4,8))+
labs(title = "t-statistics") +
theme_bw()

In [None]:
ggplot(data = t_stat_info) +
(mapping = aes(baseMean_npc, baseMean_neuro)) +
geom_smooth(method=lm) +
geom_point() +
coord_cartesian(xlim = c(0,1.2e05), ylim=c(0,1.2e05)) +
labs(title = "Base Mean Values") +
theme_bw()

In [None]:
ggplot(data = t_stat_info) +
(mapping = aes(log2FoldChange_npc, log2FoldChange_neuro)) +
#geom_smooth(method=lm) +
geom_point() +
#coord_cartesian(xlim = c(0,1.2e05), ylim=c(0,1.2e05)) +
labs(title = "log2FoldChange")+
theme_bw()

In [None]:
ggplot(data = t_stat_info) +
(mapping = aes(lfcSE_npc, lfcSE_neuro)) +
geom_smooth(method=lm) +
geom_point() +
#coord_cartesian(xlim = c(0,1.2e05), ylim=c(0,1.2e05)) +
labs(title = "Log Fold Change Standard Errors") +
theme_bw()

In [None]:
sig_plot_data <- t_stat_info %>% dplyr::filter(pvalue_npc < 0.05) %>% dplyr::filter(pvalue_neuro < 0.05)

In [None]:
ggplot(data = sig_plot_data) +
(mapping = aes(t_stat_npc, t_stat_neuro)) +
geom_smooth(method=lm, formula = y ~ x) +
geom_point() +
coord_cartesian(xlim = c(-4,6), ylim=c(-4,6))+
labs(title = "Comparison of t-statistics", x = "NPC t-statistics", y = "Neuron t-statistics", subtitle = "p < 0.05 for NPC AND p >0.05 for Neurons") +
theme_bw() +
theme(text= element_text(size = 20))

In [None]:
#sig_plot_data %>% dplyr::filter(t_stat_npc >2) %>% dplyr::filter(t_stat_neuro > 2)

In [None]:
head(sig_plot_data)
dim(sig_plot_data)

# Gene Exploration for Drug Discovery Team

In [None]:
ddgenes <- read.xlsx("../../../metadata/_h/UPR_PAHS-089Y genelist (1).xlsx", startRow = 1, colNames = FALSE)
head(ddgenes)
dim(ddgenes)

In [None]:
ddgenes_results <- t_stat_info %>% dplyr::filter(gene_name.x %in% ddgenes$X2)

In [None]:
head(ddgenes_results)
dim(ddgenes_results)

In [None]:
ddgenes_results2 <- t_stat_info %>% dplyr::filter(gene_name.x %in% ddgenes$X3)
head(ddgenes_results2)
dim(ddgenes_results2)

In [None]:
ddgenes$X3

In [None]:
"CHOP" %in% t_stat_info$gene_name.x
"GADD153" %in% t_stat_info$gene_name.x

In [None]:
#There are 3 genes which are not found in the results table, but are in the list from drug discovery

ddgenes[!(ddgenes$X2 %in% t_stat_info$gene_name.x),]

In [None]:
ggplot(data = ddgenes_results) +
(mapping = aes(t_stat_npc, t_stat_neuro)) +
geom_smooth(method=lm) +
geom_point() +
coord_cartesian(xlim = c(-4,8), ylim=c(-4,8))+
labs(title = "t-statistics") +
theme_bw() +
theme(text= element_text(size = 20)) +
labs(title = "Comparison of t-statistics", x = "NPC t-statistics", y = "Neuron t-statistics")+
theme_bw() +
theme(text= element_text(size = 20))

In [None]:
ggplot(data = t_stat_info) +
(mapping = aes(t_stat_npc, t_stat_neuro)) +
geom_smooth(method=lm) +
geom_point() +
coord_cartesian(xlim = c(-4,8), ylim=c(-4,8))+
labs(title = "t-statistics") +
theme_bw() +
geom_point(data = ddgenes_results, aes(t_stat_npc, t_stat_neuro), color = 'red') +
labs(title = "Comparison of t-statistics", x = "NPC t-statistics", y = "Neuron t-statistics") +
theme_bw() +
theme(text= element_text(size = 20))

In [None]:
ggplot(data = sig_plot_data) +
(mapping = aes(t_stat_npc, t_stat_neuro)) +
geom_smooth(method=lm, formula = y ~ x) +
geom_point() +
coord_cartesian(xlim = c(-4,6), ylim=c(-4,6))+
labs(title = "Comparison of t-statistics", x = "NPC t-statistics", y = "Neuron t-statistics", subtitle = "p < 0.05 for NPC AND p >0.05 for Neurons in all genes", caption = "red = drug discovery gene list
") +
theme_bw() +
theme(text= element_text(size = 20)) +
geom_point(data = ddgenes_results, aes(t_stat_npc, t_stat_neuro), color = 'red')

In [None]:
ddgenes_sig <- ddgenes_results %>% dplyr::filter(pvalue_npc < 0.05) %>% dplyr::filter(pvalue_neuro < 0.05)
dim(ddgenes)

In [None]:
ggplot(data = ddgenes_sig) +
(mapping = aes(t_stat_npc, t_stat_neuro)) +
geom_smooth(method=lm, formula = y ~ x) +
geom_point() +
#coord_cartesian(xlim = c(-4,6), ylim=c(-4,6))+
labs(title = "Comparison of t-statistics", x = "NPC t-statistics", y = "Neuron t-statistics", subtitle = "p < 0.05 for NPC AND p <0.05 for Neurons") +
theme_bw() +
theme(text= element_text(size = 20))

In [None]:
head(ddgenes_results)

In [None]:
head(ddgenes_results %>% arrange(pvalue_npc))

In [None]:
ggplot(ddgenes_results, aes(x = pvalue_npc, y = pvalue_neuro)) +
geom_point() +
geom_hline(aes(yintercept = 0.05)) +
geom_vline(aes(xintercept = 0.05)) + 
labs(x = "NPC p-values", y = "Neuron p-values", title = "NPC vs Neuron p-values (unadjusted)") +
theme_bw() +
theme(text= element_text(size = 20))

In [None]:
ddgenes_results %>% filter(pvalue_npc <= 0.05) %>% arrange(pvalue_npc)

In [None]:
ddgenes_results %>% filter(pvalue_neuro <= 0.05) %>% arrange(pvalue_neuro)

In [None]:
#Manually imported list from DD powerpoint

dd_pp_genes <- c('CHOP', "GADD34", "ATF4", "PERK", "BIP", "EDEM1", "p58ipk", "IRE1a", "XBP1", "HRD1", "CANX", "UGGT1", "GANAB")

In [None]:
dd_pp_genes_res <- t_stat_info %>% dplyr::filter(gene_name.x %in% dd_pp_genes)
dd_pp_genes_res

In [None]:
pp_unused_genes <- dd_pp_genes %in% ddgenes$X2
pp_unused_genes
dd_pp_genes %in% t_stat_info$gene_name.x


In [None]:
dd_pp_genes[pp_unused_genes]

In [None]:
head(dd_pp_genes)

In [None]:
ggplot(dd_pp_genes_res, aes(x = pvalue_npc, y = pvalue_neuro)) +
geom_point() +
geom_hline(aes(yintercept = 0.05)) +
geom_vline(aes(xintercept = 0.05)) + 
labs(x = "NPC p-values", y = "Neuron p-values", title = "NPC vs Neuron p-values (unadjusted)") +
theme_bw() +
theme(text= element_text(size = 20))

# Export figures and data tables

In [None]:
output <- "./"

write.table(x = sig_plot_data, file = "./all_significant_genes.tsv", sep = "\t",row.names = FALSE)
write.table(x = sig_plot_data %>% dplyr::filter(t_stat_npc >=0) %>% dplyr::filter(t_stat_neuro >= 0), file = paste0(output, "quad_01.tsv"), sep = '\t',row.names = FALSE)
write.table(x = sig_plot_data %>% dplyr::filter(t_stat_npc < 0) %>% dplyr::filter(t_stat_neuro >= 0), file = paste0(output, "quad_02.tsv"), sep = '\t',row.names = FALSE)
write.table(x = sig_plot_data %>% dplyr::filter(t_stat_npc >=0) %>% dplyr::filter(t_stat_neuro < 0), file = paste0(output, "quad_03.tsv"), sep = '\t',row.names = FALSE)
write.table(x = sig_plot_data %>% dplyr::filter(t_stat_npc < 0) %>% dplyr::filter(t_stat_neuro < 0), file = paste0(output, "quad_04.tsv"), sep = '\t',row.names = FALSE)

write.table(x = t_stat_info, file = paste0(output, "t_statistics_table.tsv"), sep = '\t', row.names = FALSE)

In [None]:
#Drug Discovery Products

write.table(x = ddgenes[!(ddgenes$X2 %in% t_stat_info$gene_name.x),], file = paste0(output,"dd_notincluded.csv"), sep = ",", row.names = FALSE)
write.table(x = ddgenes_results %>% filter(pvalue_neuro <= 0.05) %>% arrange(pvalue_neuro), file = paste0(output,"ddneuro.csv"), sep = ",", row.names = FALSE)
write.table(x = ddgenes_results %>% filter(pvalue_npc <= 0.05) %>% arrange(pvalue_npc), file = paste0(output,"ddnpc.csv"), sep = ",", row.names = FALSE)

pdf(paste0(output, 'ddgenes_scatter.pdf'))
ggplot(ddgenes_results, aes(x = pvalue_npc, y = pvalue_neuro)) +
geom_point() +
geom_hline(aes(yintercept = 0.05)) +
geom_vline(aes(xintercept = 0.05)) + 
labs(x = "NPC p-values", y = "Neuron p-values", title = "NPC vs Neuron p-values (unadjusted)") +
theme_bw() +
theme(text= element_text(size = 20))

ggplot(ddgenes_results, aes(x = padj_npc, y = padj_neuro)) +
geom_point() +
geom_hline(aes(yintercept = 0.05)) +
geom_vline(aes(xintercept = 0.05)) + 
labs(x = "NPC adjusted p-values", y = "Neuron adjusted p-values", title = "NPC vs Neuron p-values (adjusted)") +
theme_bw() +
theme(text= element_text(size = 20))
dev.off()

In [None]:
pdf(paste0(output, 'ddgenes_t_stats.pdf'))
ggplot(data = ddgenes_results) +
(mapping = aes(t_stat_npc, t_stat_neuro)) +
geom_smooth(method=lm) +
geom_point() +
coord_cartesian(xlim = c(-4,8), ylim=c(-4,8))+
labs(title = "Comparison of t-statistics", subtitle = "Drug Discovery Genes Only", x = "NPC t-statistics", y = "Neuron t-statistics") +
theme_bw() +
theme(text= element_text(size = 20))

ggplot(data = t_stat_info) +
(mapping = aes(t_stat_npc, t_stat_neuro)) +
geom_smooth(method=lm) +
geom_point() +
coord_cartesian(xlim = c(-4,8), ylim=c(-4,8))+
labs(title = "Comparison of All Significant Genes",subtitle = "All Drug Discovery Genes", x = "NPC t-statistics", y = "Neuron t-statistics" , caption = "red = All drug discovery genes") +
theme_bw() +
geom_point(data = ddgenes_results, aes(t_stat_npc, t_stat_neuro), color = 'red') +
theme(text= element_text(size = 20))

ggplot(data = sig_plot_data) +
(mapping = aes(t_stat_npc, t_stat_neuro)) +
geom_smooth(method=lm, formula = y ~ x) +
geom_point() +
coord_cartesian(xlim = c(-4,6), ylim=c(-4,6))+
labs(title = "Comparison of All Significant Genes", x = "NPC t-statistics", y = "Neuron t-statistics", subtitle = "p < 0.05 for NPC AND Neurons in all genes", caption = "red = All drug discovery genes") +
theme_bw() +
theme(text= element_text(size = 20)) +
geom_point(data = ddgenes_results, aes(t_stat_npc, t_stat_neuro), color = 'red')

dev.off()

In [None]:
pdf(paste0(output,'NPC_MAplot_padj.pdf'))
DESeq2::plotMA(resNPC)
DESeq2::plotMA(resNPC, ylim= c(-3,3))
dev.off()

pdf(paste0(output,'Neuro_MAplot_padj.pdf'))
DESeq2::plotMA(resNeuro)
DESeq2::plotMA(resNeuro, ylim=c(-3,3))
dev.off()

pdf(paste0(output,'NPC_Disp_plot.pdf'))
plotDispEsts(ddsNPC)
dev.off()

pdf(paste0(output,'Neuro_Disp_plot.pdf'))
plotDispEsts(ddsNeuro)
dev.off()

pdf(paste0(output, 'NPC_Volcano_plot.pdf'), height = 12, width = 12)
EnhancedVolcano(resNPC_labeled,
                lab = resNPC_labeled$gene_name,
                x = 'log2FoldChange',
                y = 'pvalue',
                title = "NPC Hetero vs WT"
               )
dev.off()

pdf(paste0(output, 'Neuro_Volcano_plot.pdf'), height = 12, width = 12)
EnhancedVolcano(resNeuro_labeled,
                lab = resNeuro_labeled$gene_name,
                x = 'log2FoldChange',
                y = 'pvalue',
                title = "Neuro Hetero vs WT"
               )
dev.off()

pdf(paste0(output, 't_statistics_comparison.pdf'), height = 12, width = 12)
ggplot(data = t_stat_info) +
(mapping = aes(t_stat_npc, t_stat_neuro)) +
geom_smooth(method=lm) +
geom_point() +
coord_cartesian(xlim = c(-4,8), ylim=c(-4,8))+
labs(title = "t-statistics") +
theme_bw()
dev.off()

pdf(paste0(output, 'BaseMeans_comparison.pdf'), height = 12, width = 12)
ggplot(data = t_stat_info) +
(mapping = aes(baseMean_npc, baseMean_neuro)) +
geom_smooth(method=lm) +
geom_point() +
coord_cartesian(xlim = c(0,1.2e05), ylim=c(0,1.2e05)) +
labs(title = "Base Mean Values") +
theme_bw()
dev.off()

pdf(paste0(output, 'Log2FoldChanges_comparison.pdf'), height = 12, width = 12)
ggplot(data = t_stat_info) +
(mapping = aes(log2FoldChange_npc, log2FoldChange_neuro)) +
#geom_smooth(method=lm) +
geom_point() +
#coord_cartesian(xlim = c(0,1.2e05), ylim=c(0,1.2e05)) +
labs(title = "log2FoldChange") +
theme_bw()
dev.off()

pdf(paste0(output, 'Log2FoldChangesSE_comparison.pdf'), height = 12, width = 12)
ggplot(data = t_stat_info) +
(mapping = aes(lfcSE_npc, lfcSE_neuro)) +
geom_smooth(method=lm) +
geom_point() +
#coord_cartesian(xlim = c(0,1.2e05), ylim=c(0,1.2e05)) +
labs(title = "Log Fold Change Standard Errors") +
theme_bw()
dev.off()

pdf(paste0(output, 't_stats_Significant_Genes.pdf'), height = 12, width = 12)
ggplot(data = sig_plot_data) +
(mapping = aes(t_stat_npc, t_stat_neuro)) +
geom_smooth(method=lm, formula = y ~ x) +
geom_point() +
coord_cartesian(xlim = c(-4,6), ylim=c(-4,6))+
labs(title = "Comparison of t-statistics", x = "NPC t-statistics", y = "Neuron t-statistics", subtitle = "p < 0.05 for NPC AND p >0.05 for Neurons") +
theme_bw() +
theme(text= element_text(size = 20))
dev.off()

In [None]:
dev.off()

In [None]:
#Exporting results tables

write.csv(resNPC_labeled, file=paste0(output, "NPC_results.csv"), na="NA")

write.csv(resNeuro_labeled, file=paste0(output, "Neuro_results.csv"), na="NA")

In [None]:
# https://support.bioconductor.org/p/106253/ #To convert Ensembl IDs to GeneIDs

In [None]:
sessionInfo()