# Patient ID 7669

In [1]:
# original path from server "/projects/ps-gleesonlab8/User/chchung/Interneuron/20221223_7669_Ampliseq_QC/20230101_QC/final_summary_anno3.txt"
library(ggplot2)
library(dplyr)
library(tidyr)
library(ggcorrplot)
library(gplots)
library(reshape2)
library(corrplot)
library(grid)
library(pheatmap)
library(ggdendro)
library(heatmaply)
library(devtools) 
library(ComplexHeatmap) #ComplexHeatmap cite "Gu, Z. (2022) Complex Heatmap Visualization, iMeta. DOI: 10.1002/imt2.43."
library(circlize)
library(dendsort)

filterdata <- read.table("/Users/rahulnedunuri/Documents/GleesonLab/Mosaic Variant Projects/final_summary_anno3.txt" ,header=T,sep="\t")
annotated <- read.csv("/Users/rahulnedunuri/Documents/GleesonLab/Mosaic Variant Projects/780_variant_annotation_COUPTF2_TBR1_DLX1_CBH.csv" ,header=T,sep=",")


Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



Attaching package: 'gplots'


The following object is masked from 'package:stats':

    lowess



Attaching package: 'reshape2'


The following object is masked from 'package:tidyr':

    smiths


corrplot 0.92 loaded

Loading required package: plotly


Attaching package: 'plotly'


The following object is masked from 'package:ggplot2':

    last_plot


The following object is masked from 'package:stats':

    filter


The following object is masked from 'package:graphics':

    layout


Loading required package: viridis

Loading required package: viridisLite


Welcome to heatmaply version 1.4.2

Type citation('heatmaply') for how to cite the package.
Type ?heatmaply for the main documentation.

The github page is: https://github.com/talgalili/heatmaply/
Please submit your suggestions an

In [2]:
names(filterdata)
names(annotated)

## Filter for Mosaic Variants Shared by WGS and Amplicon Sequencing

In [3]:
#sort df for only WGS + Ampliseq confirmed (1) -> confirmed variants (excluding JGG controls)
totalConfVarsInclCtrl = length(unique( subset(filterdata, Mosaic_shared_btw_WGS_Amp == 1) ))
confirmedVars = subset(filterdata, Mosaic_shared_btw_WGS_Amp == 1 & Organ != "JGG")

numTotalVars = length(unique((confirmedVars$CHROM.POS.REF.ALT)))
pretext = "Confirmed total variants (WGS + Amp): "
print(paste(pretext, length(unique((confirmedVars$CHROM.POS.REF.ALT))))) 

[1] "Confirmed total variants (WGS + Amp):  780"


## Sorting of Confirmed Mosaic Variants by Region and MAF_CI within Mosaic Threshold

In [4]:
#Mosaic Variants only in the brain
#Brain : CTX, BG, THAL, HIP, POA (preoptic area), OLF, CB (cerebellum)
#Kidney : ADRENAL, KIDNEY
#Other: HEART, LIVER, SKIN
# Exculde JGG variants (control)


allOrgans = unique(confirmedVars$Organ)
#print("All regions: ")
#print(allOrgans)

##use MAF above threshold
##upper and lower bounds for MAF to be a valid variant
Mosaic_UPPER = 0.4562841
Mosaic_LOWER = 0.002360687

brainVars = subset(confirmedVars, (Organ == "CTX" | Organ == "BG" | Organ == "THAL" | Organ == "HIP" | Organ == "POA" | Organ == "CB" | Organ == "OLF"))
brainVars = subset(brainVars, LOWER_CI > Mosaic_LOWER & UPPER_CI < Mosaic_UPPER & NORMAL_LOWER_CI < Mosaic_LOWER)
numBrainUniqueVars = length(unique(brainVars$CHROM.POS.REF.ALT))

leftHemisphereVars = subset(brainVars, Hemisphere == "L")
leftHemisphereVars = subset(leftHemisphereVars, LOWER_CI > Mosaic_LOWER & UPPER_CI < Mosaic_UPPER & NORMAL_LOWER_CI < Mosaic_LOWER)
numLeftHVars = length(unique(leftHemisphereVars$CHROM.POS.REF.ALT))

rightHemisphereVars = subset(brainVars, Hemisphere == "R")
rightHemisphereVars = subset(rightHemisphereVars, LOWER_CI > Mosaic_LOWER & UPPER_CI < Mosaic_UPPER & NORMAL_LOWER_CI < Mosaic_LOWER)
numRightHVars = length(unique(rightHemisphereVars$CHROM.POS.REF.ALT))

cortexVars = subset(brainVars, Organ == "CTX")
cortexVars = subset(cortexVars, LOWER_CI > Mosaic_LOWER & UPPER_CI < Mosaic_UPPER & NORMAL_LOWER_CI < Mosaic_LOWER)
numCortexVars = length(unique(cortexVars$CHROM.POS.REF.ALT))

basalGangliaVars = subset(brainVars, Organ == "BG")
basalGangliaVars = subset(basalGangliaVars, LOWER_CI > Mosaic_LOWER & UPPER_CI < Mosaic_UPPER & NORMAL_LOWER_CI < Mosaic_LOWER)
numBgVars = length(unique(basalGangliaVars$CHROM.POS.REF.ALT))

dlx1Vars = subset(brainVars, Cell_Type == "DLX1")
dlx1Vars = subset(dlx1Vars, LOWER_CI > Mosaic_LOWER & UPPER_CI < Mosaic_UPPER & NORMAL_LOWER_CI < Mosaic_LOWER)
numDlx1Vars = length(unique(dlx1Vars$CHROM.POS.REF.ALT))

tbr1Vars = subset(brainVars, Cell_Type == "TBR1")
tbr1Vars = subset(tbr1Vars, LOWER_CI > Mosaic_LOWER & UPPER_CI < Mosaic_UPPER & NORMAL_LOWER_CI < Mosaic_LOWER)
numTbr1Vars = length(unique(tbr1Vars$CHROM.POS.REF.ALT))

dlx1_couptf2Vars = subset(brainVars, Cell_Type == "DLX1" | Cell_Type == "COUPTF2")
dlx1_couptf2Vars = subset(dlx1_couptf2Vars, LOWER_CI > Mosaic_LOWER & UPPER_CI < Mosaic_UPPER & NORMAL_LOWER_CI < Mosaic_LOWER)
numdlx1_couptf2Vars = length(unique(dlx1_couptf2Vars$CHROM.POS.REF.ALT))



print(paste(numTotalVars, "total confirmed unique variants"))
print(paste(numBrainUniqueVars, "unique variants in brain"))
print(paste(numLeftHVars, "unique variants in left hemisphere"))
print(paste(numRightHVars, "unique variants in right hemisphere"))
print(paste(numCortexVars, "unique variants in cortex"))
print(paste(numBgVars, "unique variants in basal ganglia"))
print(paste(numDlx1Vars, "unique variants in DLX1 cells"))
print(paste(numTbr1Vars, "unique variants in TBR1 cells"))
print(paste(numdlx1_couptf2Vars, "unique variants in DLX1+COUPTFII cells (GABAergic interneurons)"))


[1] "780 total confirmed unique variants"
[1] "428 unique variants in brain"
[1] "264 unique variants in left hemisphere"
[1] "282 unique variants in right hemisphere"
[1] "201 unique variants in cortex"
[1] "178 unique variants in basal ganglia"
[1] "190 unique variants in DLX1 cells"
[1] "156 unique variants in TBR1 cells"
[1] "194 unique variants in DLX1+COUPTFII cells (GABAergic interneurons)"


## Bargraph of Number of Unique Mosaic Variants By Region

In [5]:

variantCounts = c(numBrainUniqueVars, numCortexVars, numTotalVars, numLeftHVars, numRightHVars, numBgVars)
regions = c("Brain", "Cortex", "Total", "Left H", "Right H", "Basal Ganglia")
variantCountsByCellType = c(numDlx1Vars, numTbr1Vars, numdlx1_couptf2Vars)
cellTypes = c("DLX1", "TBR1", "DLX1+COUPTF2")

variantsByRegion = data.frame(regions, variantCounts)
variantsByCellType = data.frame(cellTypes, variantCountsByCellType)

regionCount <- ggplot(variantsByRegion, aes(x = regions, y = variantCounts)) +
  geom_bar(stat = "identity") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "white"),
        axis.line = element_line()) + 
  geom_text(aes(label = variantCounts), position = position_dodge(0.9), vjust = -0.5, color = "#940303")

cellTypeCount <- ggplot(variantsByCellType, aes(x = cellTypes, y = variantCountsByCellType)) +
  geom_bar(stat = "identity") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_rect(fill = "white"),
        axis.line = element_line()) + 
  geom_text(aes(label = variantCountsByCellType), position = position_dodge(0.9), vjust = -0.5, color = "#940303")



## Correlation matrix of Variant x Variant using ComplexHeatmap

In [6]:
#ComplexHeatmap plot (working)
#PEARSON

varsConfirmedIn2Sample = subset(annotated, sample_number_presented >= 2)

confirmed2SampleVars = subset(confirmedVars, CHROM.POS.REF.ALT %in% varsConfirmedIn2Sample$variant_list)

onlyMAFofVars = split(sqrt(confirmed2SampleVars$MAF), confirmed2SampleVars$CHROM.POS.REF.ALT) #now using sqrt(MAF) 4/14/23
df_encoded <- model.matrix(~ . - 1, data = onlyMAFofVars) #converts to matrix format with col variants, row MAF
pearson_corr_matrix = round(cor(df_encoded, method = 'pearson', use = 'pairwise.complete.obs'), 4)

LR_colors = colorRamp2(c(-1, 0, 1), c("#d68426", "#ffffff", "#2a1abf"))

pdf("220_correlation_7669_Pearson.pdf", width = unit(16, "cm"), height = unit(8, "cm"))

legendDets = list(
Bulk = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Bulk")),
ADRENAL = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Adrenal")),
HEART = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Heart")),
LIVER = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Liver")),
KIDNEY = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Kidney")),
SKIN = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Skin")),
Brain_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Brain Only")),
CB = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Cerebellum")),
CTX = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Cortex")),
POA = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Preoptic Area")),
BG = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Basal Ganglia")),
THAL = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Thalamus")),
HIP = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Hippocampus")),
OLF = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Olfactory Bulb")),
CTX_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("CTX Only")),
CTX_BG_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("CTX+BG Only")),
BG_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("BG Only")),
THAL_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Thalamus Only")),
L = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Left")),
R = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Right")),
LR_only = list(
        title = "",
        at = c(-1, 1),
        color_bar = "discrete",
        labels = c("Right Only", "Left Only")),
NeuN = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("NeuN")),
IN_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Interneuron Only")),
COUPTF2 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("COUPTF2")),
COUP_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("COUPTF2 only")),
TBR1 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("TBR1")),
TBR_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("TBR1 only")),
TBR_BR = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("TBR1 Brain only")),
DLX1 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("DLX1")),
DLX_BR = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("DLX1 Brain only")))

varsConfirmedIn2Sample = varsConfirmedIn2Sample[order(varsConfirmedIn2Sample$X), ]

left_right_cell_col_ann = HeatmapAnnotation(simple_anno_size = unit(0.2, "cm"), 
                                        which = "row", 
                                        annotation_name_gp = gpar(fontsize = 4),
                                        Bulk = varsConfirmedIn2Sample$Bulk,
                                        ADRENAL = varsConfirmedIn2Sample$ADRENAL,
                                        HEART = varsConfirmedIn2Sample$HEART,
                                        LIVER = varsConfirmedIn2Sample$LIVER,
                                        KIDNEY = varsConfirmedIn2Sample$KIDNEY,
                                        SKIN = varsConfirmedIn2Sample$SKIN,
                                        Brain_only = varsConfirmedIn2Sample$Brain_only,
                                        CB = varsConfirmedIn2Sample$CB,
                                        CTX = varsConfirmedIn2Sample$CTX,
                                        POA = varsConfirmedIn2Sample$POA,
                                        BG = varsConfirmedIn2Sample$BG,
                                        THAL = varsConfirmedIn2Sample$THAL,
                                        HIP = varsConfirmedIn2Sample$HIP,
                                        OLF = varsConfirmedIn2Sample$OLF,
                                        CTX_only = varsConfirmedIn2Sample$CTX_only,
                                        CTX_BG_only = varsConfirmedIn2Sample$CTX_BG_only,
                                        BG_only = varsConfirmedIn2Sample$BG_only,
                                        THAL_only = varsConfirmedIn2Sample$THAL_only,
                                        L = varsConfirmedIn2Sample$L,
                                        R = varsConfirmedIn2Sample$R,
                                        LR_only = varsConfirmedIn2Sample$Left_only - varsConfirmedIn2Sample$Right_only, 
                                        NeuN = varsConfirmedIn2Sample$NeuN,
                                        IN_only = varsConfirmedIn2Sample$IN_only,
                                        COUPTF2 = varsConfirmedIn2Sample$COUPTF2,
                                        COUP_only = varsConfirmedIn2Sample$COUP_only,
                                        TBR1 = varsConfirmedIn2Sample$TBR1,
                                        TBR_only = varsConfirmedIn2Sample$TBR_only,
                                        TBR_BR = varsConfirmedIn2Sample$TBR_BR,
                                        DLX1 = varsConfirmedIn2Sample$DLX1,
                                        DLX_BR = varsConfirmedIn2Sample$DLX_BR,
                                        col = list(LR_only = LR_colors,
                                        COUP_only = colorRamp2(c(0, 1), c("#ffffff", "black")),
                                        IN_only = colorRamp2(c(0, 1), c("#ffffff", "darkgreen")),
                                        TBR_only = colorRamp2(c(0, 1), c("#ffffff", "purple")), 
                                        TBR_BR = colorRamp2(c(0, 1), c("#ffffff", "#e58f9d")),
                                        DLX_BR = colorRamp2(c(0, 1), c("#ffffff", "maroon"))))#,
                                        #annotation_legend_param = legendDets) 

leftMapUpdate = Heatmap(pearson_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "euclidean", clustering_distance_columns = "euclidean",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
leftMapUpdate
dev.off()

pdf("220_correlation_7669_Pearson_Manhattan_Dist.pdf", width = unit(16, "cm"), height = unit(8, "cm"))

varsConfirmedIn2Sample = varsConfirmedIn2Sample[order(varsConfirmedIn2Sample$X), ]

leftMapUpdate = Heatmap(pearson_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "manhattan", clustering_distance_columns = "manhattan",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
leftMapUpdate
dev.off()

In [7]:
#SPEARMAN


pdf("220_correlation_7669_Spearman.pdf", width = unit(16, "cm"), height = unit(8, "cm"))

legendDets = list(
Bulk = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Bulk")),
ADRENAL = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Adrenal")),
HEART = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Heart")),
LIVER = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Liver")),
KIDNEY = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Kidney")),
SKIN = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Skin")),
Brain_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Brain Only")),
CB = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Cerebellum")),
CTX = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Cortex")),
POA = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Preoptic Area")),
BG = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Basal Ganglia")),
THAL = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Thalamus")),
HIP = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Hippocampus")),
OLF = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Olfactory Bulb")),
CTX_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("CTX Only")),
CTX_BG_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("CTX+BG Only")),
BG_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("BG Only")),
THAL_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Thalamus Only")),
L = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Left")),
R = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Right")),
LR_only = list(
        title = "",
        at = c(-1, 1),
        color_bar = "discrete",
        labels = c("Right Only", "Left Only")),
NeuN = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("NeuN")),
IN_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Interneuron Only")),
COUPTF2 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("COUPTF2")),
COUP_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("COUPTF2 only")),
TBR1 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("TBR1")),
TBR_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("TBR1 only")),
TBR_BR = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("TBR1 Brain only")),
DLX1 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("DLX1")),
DLX_BR = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("DLX1 Brain only")))

spearman_corr_matrix = round(cor(df_encoded, method = 'spearman', use = 'pairwise.complete.obs'), 4)

varsConfirmedIn2Sample = varsConfirmedIn2Sample[order(varsConfirmedIn2Sample$X), ]

left_right_cell_col_ann = HeatmapAnnotation(simple_anno_size = unit(0.2, "cm"), 
                                        which = "row", 
                                        annotation_name_gp = gpar(fontsize = 4),
                                        Bulk = varsConfirmedIn2Sample$Bulk,
                                        ADRENAL = varsConfirmedIn2Sample$ADRENAL,
                                        HEART = varsConfirmedIn2Sample$HEART,
                                        LIVER = varsConfirmedIn2Sample$LIVER,
                                        KIDNEY = varsConfirmedIn2Sample$KIDNEY,
                                        SKIN = varsConfirmedIn2Sample$SKIN,
                                        Brain_only = varsConfirmedIn2Sample$Brain_only,
                                        CB = varsConfirmedIn2Sample$CB,
                                        CTX = varsConfirmedIn2Sample$CTX,
                                        POA = varsConfirmedIn2Sample$POA,
                                        BG = varsConfirmedIn2Sample$BG,
                                        THAL = varsConfirmedIn2Sample$THAL,
                                        HIP = varsConfirmedIn2Sample$HIP,
                                        OLF = varsConfirmedIn2Sample$OLF,
                                        CTX_only = varsConfirmedIn2Sample$CTX_only,
                                        CTX_BG_only = varsConfirmedIn2Sample$CTX_BG_only,
                                        BG_only = varsConfirmedIn2Sample$BG_only,
                                        THAL_only = varsConfirmedIn2Sample$THAL_only,
                                        L = varsConfirmedIn2Sample$L,
                                        R = varsConfirmedIn2Sample$R,
                                        LR_only = varsConfirmedIn2Sample$Left_only - varsConfirmedIn2Sample$Right_only, 
                                        NeuN = varsConfirmedIn2Sample$NeuN,
                                        IN_only = varsConfirmedIn2Sample$IN_only,
                                        COUPTF2 = varsConfirmedIn2Sample$COUPTF2,
                                        COUP_only = varsConfirmedIn2Sample$COUP_only,
                                        TBR1 = varsConfirmedIn2Sample$TBR1,
                                        TBR_only = varsConfirmedIn2Sample$TBR_only,
                                        TBR_BR = varsConfirmedIn2Sample$TBR_BR,
                                        DLX1 = varsConfirmedIn2Sample$DLX1,
                                        DLX_BR = varsConfirmedIn2Sample$DLX_BR,
                                        col = list(LR_only = LR_colors,
                                        COUP_only = colorRamp2(c(0, 1), c("#ffffff", "black")),
                                        IN_only = colorRamp2(c(0, 1), c("#ffffff", "darkgreen")),
                                        TBR_only = colorRamp2(c(0, 1), c("#ffffff", "purple")), 
                                        TBR_BR = colorRamp2(c(0, 1), c("#ffffff", "#e58f9d")),
                                        DLX_BR = colorRamp2(c(0, 1), c("#ffffff", "maroon"))))#,
                                        #annotation_legend_param = legendDets) 


leftMapUpdate = Heatmap(spearman_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "euclidean", clustering_distance_columns = "euclidean",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
leftMapUpdate
dev.off()


In [8]:
#BULK

bulkVars = subset(confirmedVars, Cell_Type == "Bulk")
varsConfirmedInBulk2Sample = subset(varsConfirmedIn2Sample, Bulk == 1)
confirmed2SampleBulkVars = subset(bulkVars, CHROM.POS.REF.ALT %in% varsConfirmedInBulk2Sample$variant_list)

onlyMAFofVars = split(sqrt(confirmed2SampleBulkVars$MAF), confirmed2SampleBulkVars$CHROM.POS.REF.ALT) #now using sqrt(MAF) 4/14/23
df_encoded <- model.matrix(~ . - 1, data = onlyMAFofVars) #converts to matrix format with col variants, row MAF
pearson_corr_matrix = round(cor(df_encoded, method = 'pearson', use = 'pairwise.complete.obs'), 4)

pdf("220_correlation_BULK_Pearson_Manhattan_Dist.pdf", width = unit(16, "cm"), height = unit(8, "cm"))

legendDets = list(
Bulk = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Bulk")),
ADRENAL = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Adrenal")),
HEART = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Heart")),
LIVER = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Liver")),
KIDNEY = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Kidney")),
SKIN = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Skin")),
Brain_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Brain Only")),
CB = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Cerebellum")),
CTX = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Cortex")),
POA = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Preoptic Area")),
BG = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Basal Ganglia")),
THAL = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Thalamus")),
HIP = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Hippocampus")),
OLF = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Olfactory Bulb")),
CTX_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("CTX Only")),
CTX_BG_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("CTX+BG Only")),
BG_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("BG Only")),
THAL_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Thalamus Only")),
L = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Left")),
R = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Right")),
LR_only = list(
        title = "",
        at = c(-1, 1),
        color_bar = "discrete",
        labels = c("Right Only", "Left Only")),
NeuN = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("NeuN")),
IN_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Interneuron Only")),
COUPTF2 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("COUPTF2")),
COUP_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("COUPTF2 only")),
TBR1 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("TBR1")),
TBR_only = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("TBR1 only")),
TBR_BR = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("TBR1 Brain only")),
DLX1 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("DLX1")),
DLX_BR = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("DLX1 Brain only")))

varsConfirmedInBulk2Sample = varsConfirmedInBulk2Sample[order(varsConfirmedInBulk2Sample$X), ]

left_right_cell_col_ann = HeatmapAnnotation(simple_anno_size = unit(0.2, "cm"), 
                                        which = "row", 
                                        annotation_name_gp = gpar(fontsize = 4),
                                        Bulk = varsConfirmedInBulk2Sample$Bulk,
                                        ADRENAL = varsConfirmedInBulk2Sample$ADRENAL,
                                        HEART = varsConfirmedInBulk2Sample$HEART,
                                        LIVER = varsConfirmedInBulk2Sample$LIVER,
                                        KIDNEY = varsConfirmedInBulk2Sample$KIDNEY,
                                        SKIN = varsConfirmedInBulk2Sample$SKIN,
                                        Brain_only = varsConfirmedInBulk2Sample$Brain_only,
                                        CB = varsConfirmedInBulk2Sample$CB,
                                        CTX = varsConfirmedInBulk2Sample$CTX,
                                        POA = varsConfirmedInBulk2Sample$POA,
                                        BG = varsConfirmedInBulk2Sample$BG,
                                        THAL = varsConfirmedInBulk2Sample$THAL,
                                        HIP = varsConfirmedInBulk2Sample$HIP,
                                        OLF = varsConfirmedInBulk2Sample$OLF,
                                        CTX_only = varsConfirmedInBulk2Sample$CTX_only,
                                        CTX_BG_only = varsConfirmedInBulk2Sample$CTX_BG_only,
                                        BG_only = varsConfirmedInBulk2Sample$BG_only,
                                        THAL_only = varsConfirmedInBulk2Sample$THAL_only,
                                        L = varsConfirmedInBulk2Sample$L,
                                        R = varsConfirmedInBulk2Sample$R,
                                        LR_only = varsConfirmedInBulk2Sample$Left_only - varsConfirmedInBulk2Sample$Right_only, 
                                        NeuN = varsConfirmedInBulk2Sample$NeuN,
                                        IN_only = varsConfirmedInBulk2Sample$IN_only,
                                        COUPTF2 = varsConfirmedInBulk2Sample$COUPTF2,
                                        COUP_only = varsConfirmedInBulk2Sample$COUP_only,
                                        TBR1 = varsConfirmedInBulk2Sample$TBR1,
                                        TBR_only = varsConfirmedInBulk2Sample$TBR_only,
                                        TBR_BR = varsConfirmedInBulk2Sample$TBR_BR,
                                        DLX1 = varsConfirmedInBulk2Sample$DLX1,
                                        DLX_BR = varsConfirmedInBulk2Sample$DLX_BR,
                                        col = list(LR_only = LR_colors,
                                        COUP_only = colorRamp2(c(0, 1), c("#ffffff", "black")),
                                        IN_only = colorRamp2(c(0, 1), c("#ffffff", "darkgreen")),
                                        TBR_only = colorRamp2(c(0, 1), c("#ffffff", "purple")), 
                                        TBR_BR = colorRamp2(c(0, 1), c("#ffffff", "#e58f9d")),
                                        DLX_BR = colorRamp2(c(0, 1), c("#ffffff", "maroon"))))#,
                                        #annotation_legend_param = legendDets) 

bulkMapUpdate = Heatmap(pearson_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "manhattan", clustering_distance_columns = "manhattan",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
bulkMapUpdate
dev.off()

spearman_corr_matrix = round(cor(df_encoded, method = 'spearman', use = 'pairwise.complete.obs'), 4)
pdf("220_correlation_BULK_Spearman_Manhattan_Dist.pdf", width = unit(16, "cm"), height = unit(8, "cm"))

varsConfirmedInBulk2Sample = varsConfirmedInBulk2Sample[order(varsConfirmedInBulk2Sample$X), ]


bulkMapUpdate = Heatmap(spearman_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "manhattan", clustering_distance_columns = "manhattan",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
bulkMapUpdate
dev.off()

In [9]:
color_list = list(LR_ONLY = LR_colors,
                COUP_ONLY = colorRamp2(c(0, 1), c("#ffffff", "black")),
                IN_ONLY = colorRamp2(c(0, 1), c("#ffffff", "darkgreen")),
                TBR_ONLY = colorRamp2(c(0, 1), c("#ffffff", "purple")), 
                TBR_BR = colorRamp2(c(0, 1), c("#ffffff", "#e58f9d")),
                DLX_BR = colorRamp2(c(0, 1), c("#ffffff", "maroon")))

In [20]:
#DLX1
dlxVars = subset(confirmedVars, Sort_main == 1 & Cell_Type == "DLX1")
varsConfirmedInDLX1_2Sample = subset(annotated, sample_number_presented_DLX1 >= 2) # using __DLX1 columns
confirmed2SampleDLXVars = subset(dlxVars, CHROM.POS.REF.ALT %in% varsConfirmedInDLX1_2Sample$variant_list)

onlyMAFofVars = split(sqrt(confirmed2SampleDLXVars$MAF), confirmed2SampleDLXVars$CHROM.POS.REF.ALT) #now using sqrt(MAF) 4/14/23
df_encoded <- model.matrix(~ . - 1, data = onlyMAFofVars) #converts to matrix format with col variants, row MAF
pearson_corr_matrix = round(cor(df_encoded, method = 'pearson', use = 'pairwise.complete.obs'), 4)
spearman_corr_matrix = round(cor(df_encoded, method = 'spearman', use = 'pairwise.complete.obs'), 4)

varsConfirmedInDLX1_2Sample = varsConfirmedInDLX1_2Sample[, colSums(varsConfirmedInDLX1_2Sample != 0) > 0] #removes all 0 value columns

pdf("220_correlation_DLX_Pearson.pdf", width = unit(16, "cm"), height = unit(8, "cm"))

names(varsConfirmedInDLX1_2Sample %>% select(ends_with("_DLX1")))


varsConfirmedInDLX1_2Sample = varsConfirmedInDLX1_2Sample[order(varsConfirmedInDLX1_2Sample$X), ]

left_right_cell_col_ann = HeatmapAnnotation(simple_anno_size = unit(0.2, "cm"), 
                                        which = "row", 
                                        annotation_name_gp = gpar(fontsize = 4),
                                        #BRAIN_ONLY = varsConfirmedInDLX1_2Sample$Brain_only,
                                        CTX = varsConfirmedInDLX1_2Sample$CTX_DLX1,
                                        BG = varsConfirmedInDLX1_2Sample$BG_DLX1,
                                        THAL = varsConfirmedInDLX1_2Sample$THAL_DLX1,
                                        PF = varsConfirmedInDLX1_2Sample$PF_DLX1,
                                        F = varsConfirmedInDLX1_2Sample$F_DLX1,
                                        P = varsConfirmedInDLX1_2Sample$P_DLX1,
                                        O = varsConfirmedInDLX1_2Sample$O_DLX1,
                                        T = varsConfirmedInDLX1_2Sample$T_DLX1,
                                        INS = varsConfirmedInDLX1_2Sample$Ins_DLX1,
                                        PUT = varsConfirmedInDLX1_2Sample$Put_DLX1,
                                        GP = varsConfirmedInDLX1_2Sample$GP_DLX1,
                                        CTX_ONLY = varsConfirmedInDLX1_2Sample$CTX_only_DLX1,
                                        CTX_BG_ONLY = varsConfirmedInDLX1_2Sample$CTX_BG_only_DLX1,
                                        BG_ONLY = varsConfirmedInDLX1_2Sample$BG_only_DLX1,
                                        LR_ONLY = varsConfirmedInDLX1_2Sample$Left_only_DLX1 - varsConfirmedInDLX1_2Sample$Right_only_DLX1, 
                                        L_BR_PF_ONLY = varsConfirmedInDLX1_2Sample$L_BR_PF_only_DLX1,
                                        L_BR_F_ONLY = varsConfirmedInDLX1_2Sample$L_BR_F_only_DLX1,
                                        L_BR_O_ONLY = varsConfirmedInDLX1_2Sample$L_BR_O_only_DLX1,
                                        L_BR_INS_ONLY = varsConfirmedInDLX1_2Sample$L_BR_Ins_only_DLX1,
                                        R_BR_F_ONLY = varsConfirmedInDLX1_2Sample$R_BR_F_only_DLX1,
                                        R_BR_O_ONLY = varsConfirmedInDLX1_2Sample$R_BR_O_only_DLX1,
                                        R_BR_INS_ONLY = varsConfirmedInDLX1_2Sample$R_BR_Ins_only_DLX1,
                                        IN_ONLY = varsConfirmedInDLX1_2Sample$IN_only_DLX1,
                                        COUPTF2 = varsConfirmedInDLX1_2Sample$COUPTF2_DLX1,
                                        COUP_BR = varsConfirmedInDLX1_2Sample$COUP_BR_DLX1,
                                        COUP_ONLY = varsConfirmedInDLX1_2Sample$COUP_only_DLX1,
                                        col = color_list)#,
                                        #annotation_legend_param = legendDets)

dlxMapUpdate = Heatmap(pearson_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "euclidean", clustering_distance_columns = "euclidean",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
dlxMapUpdate
dev.off()

pdf("220_correlation_DLX_Pearson_Manhattan_Dist.pdf", width = unit(20, "cm"), height = unit(8, "cm"))


varsConfirmedInDLX1_2Sample = varsConfirmedInDLX1_2Sample[order(varsConfirmedInDLX1_2Sample$X), ]


dlxMapUpdate = Heatmap(spearman_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "manhattan", clustering_distance_columns = "manhattan",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
dlxMapUpdate
dev.off()

pdf("220_correlation_DLX_Spearman.pdf", width = unit(16, "cm"), height = unit(8, "cm"))

varsConfirmedInDLX1_2Sample = varsConfirmedInDLX1_2Sample[order(varsConfirmedInDLX1_2Sample$X), ]


dlxMapUpdate = Heatmap(spearman_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "euclidean", clustering_distance_columns = "euclidean",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
dlxMapUpdate
dev.off()

In [21]:
#TBR1
tbrVars = subset(confirmedVars, Sort_main == 1 & Cell_Type == "TBR1")
varsConfirmedInTBR1_2Sample = subset(annotated, sample_number_presented_TBR1 >= 2) # using __TBR1 columns
confirmed2SampleTBRVars = subset(tbrVars, CHROM.POS.REF.ALT %in% varsConfirmedInTBR1_2Sample$variant_list)

onlyMAFofVars = split(sqrt(confirmed2SampleTBRVars$MAF), confirmed2SampleTBRVars$CHROM.POS.REF.ALT) #now using sqrt(MAF) 4/14/23
df_encoded <- model.matrix(~ . - 1, data = onlyMAFofVars) #converts to matrix format with col variants, row MAF
pearson_corr_matrix = round(cor(df_encoded, method = 'pearson', use = 'pairwise.complete.obs'), 4)
spearman_corr_matrix = round(cor(df_encoded, method = 'spearman', use = 'pairwise.complete.obs'), 4)

varsConfirmedInTBR1_2Sample = varsConfirmedInTBR1_2Sample[, colSums(varsConfirmedInTBR1_2Sample != 0) > 0] #removes all 0 value columns

names(varsConfirmedInTBR1_2Sample %>% select(ends_with("_TBR1")))
pdf("220_correlation_TBR_Pearson.pdf", width = unit(16, "cm"), height = unit(8, "cm"))


varsConfirmedInTBR1_2Sample = varsConfirmedInTBR1_2Sample[order(varsConfirmedInTBR1_2Sample$X), ]

left_right_cell_col_ann = HeatmapAnnotation(simple_anno_size = unit(0.2, "cm"), 
                                        which = "row", 
                                        annotation_name_gp = gpar(fontsize = 4),
                                        BRAIN_ONLY = varsConfirmedInTBR1_2Sample$Brain_only_TBR1,
                                        CTX = varsConfirmedInTBR1_2Sample$CTX_TBR1,
                                        PF = varsConfirmedInTBR1_2Sample$PF_TBR1,
                                        F = varsConfirmedInTBR1_2Sample$F_TBR1,
                                        P = varsConfirmedInTBR1_2Sample$P_TBR1,
                                        O = varsConfirmedInTBR1_2Sample$O_TBR1,
                                        T = varsConfirmedInTBR1_2Sample$T_TBR1,
                                        INS = varsConfirmedInTBR1_2Sample$Ins_TBR1,
                                        CTX_ONLY = varsConfirmedInTBR1_2Sample$CTX_only_TBR1,
                                        CTX_BG_ONLY = varsConfirmedInTBR1_2Sample$CTX_BG_only_TBR1,
                                        L = varsConfirmedInTBR1_2Sample$L_TBR1,
                                        R = varsConfirmedInTBR1_2Sample$R_TBR1,
                                        LR_ONLY = varsConfirmedInTBR1_2Sample$Left_only_TBR1 - varsConfirmedInTBR1_2Sample$Right_only_TBR1, 
                                        L_BR_PF_ONLY = varsConfirmedInTBR1_2Sample$L_BR_PF_only_TBR1,
                                        L_BR_F_ONLY = varsConfirmedInTBR1_2Sample$L_BR_F_only_TBR1,
                                        L_BR_O_ONLY = varsConfirmedInTBR1_2Sample$L_BR_O_only_TBR1,
                                        R_BR_F_ONLY = varsConfirmedInTBR1_2Sample$R_BR_F_only_TBR1,
                                        R_BR_O_ONLY = varsConfirmedInTBR1_2Sample$R_BR_O_only_TBR1,
                                        TBR1 = varsConfirmedInTBR1_2Sample$TBR1_TBR1,
                                        TBR_ONLY = varsConfirmedInTBR1_2Sample$TBR_only_TBR1,
                                        TBR_BR = varsConfirmedInTBR1_2Sample$TBR_BR_TBR1,
                                        col = color_list)#,
                                        #annotation_legend_param = legendDets)

tbrMapUpdate = Heatmap(pearson_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "euclidean", clustering_distance_columns = "euclidean",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
tbrMapUpdate
dev.off()

pdf("220_correlation_TBR_Pearson_Manhattan_Dist.pdf", width = unit(20, "cm"), height = unit(8, "cm"))


varsConfirmedInTBR1_2Sample = varsConfirmedInTBR1_2Sample[order(varsConfirmedInTBR1_2Sample$X), ]


tbrMapUpdate = Heatmap(spearman_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "manhattan", clustering_distance_columns = "manhattan",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
tbrMapUpdate
dev.off()

pdf("220_correlation_TBR_Spearman.pdf", width = unit(16, "cm"), height = unit(8, "cm"))

varsConfirmedInTBR1_2Sample = varsConfirmedInTBR1_2Sample[order(varsConfirmedInTBR1_2Sample$X), ]


tbrMapUpdate = Heatmap(spearman_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "euclidean", clustering_distance_columns = "euclidean",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
tbrMapUpdate
dev.off()

In [22]:
#COUPTF2
coupVars = subset(confirmedVars, Cell_Type == "COUPTF2")
varsConfirmedInCOUPTF2_2Sample = subset(annotated, sample_number_presented_COUPTF2 >= 2) # using __COUPTF2 columns
confirmed2SampleCOUPVars = subset(coupVars, CHROM.POS.REF.ALT %in% varsConfirmedInCOUPTF2_2Sample$variant_list)

onlyMAFofVars = split(sqrt(confirmed2SampleCOUPVars$MAF), confirmed2SampleCOUPVars$CHROM.POS.REF.ALT) #now using sqrt(MAF) 4/14/23
df_encoded <- model.matrix(~ . - 1, data = onlyMAFofVars) #converts to matrix format with col variants, row MAF
pearson_corr_matrix = round(cor(df_encoded, method = 'pearson', use = 'pairwise.complete.obs'), 4)
spearman_corr_matrix = round(cor(df_encoded, method = 'spearman', use = 'pairwise.complete.obs'), 4)

varsConfirmedInCOUPTF2_2Sample = varsConfirmedInCOUPTF2_2Sample[, colSums(varsConfirmedInCOUPTF2_2Sample != 0) > 0] #removes all 0 value columns

names(varsConfirmedInCOUPTF2_2Sample %>% select(ends_with("_COUPTF2")))
pdf("220_correlation_COUP_Pearson.pdf", width = unit(16, "cm"), height = unit(8, "cm"))


varsConfirmedInCOUPTF2_2Sample = varsConfirmedInCOUPTF2_2Sample[order(varsConfirmedInCOUPTF2_2Sample$X), ]

left_right_cell_col_ann = HeatmapAnnotation(simple_anno_size = unit(0.2, "cm"), 
                                        which = "row", 
                                        annotation_name_gp = gpar(fontsize = 4),
                                        BRAIN_ONLY = varsConfirmedInCOUPTF2_2Sample$Brain_only_COUPTF2,
                                        CTX = varsConfirmedInCOUPTF2_2Sample$CTX_COUPTF2,
                                        BG = varsConfirmedInCOUPTF2_2Sample$BG_COUPTF2,
                                        THAL = varsConfirmedInCOUPTF2_2Sample$THAL_COUPTF2,
                                        PF = varsConfirmedInCOUPTF2_2Sample$PF_COUPTF2,
                                        F = varsConfirmedInCOUPTF2_2Sample$F_COUPTF2,
                                        P = varsConfirmedInCOUPTF2_2Sample$P_COUPTF2,
                                        O = varsConfirmedInCOUPTF2_2Sample$O_COUPTF2,
                                        T = varsConfirmedInCOUPTF2_2Sample$T_COUPTF2,
                                        INS = varsConfirmedInCOUPTF2_2Sample$Ins_COUPTF2,
                                        PUT = varsConfirmedInCOUPTF2_2Sample$Put_COUPTF2,
                                        GP = varsConfirmedInCOUPTF2_2Sample$GP_COUPTF2,
                                        CTX_ONLY = varsConfirmedInCOUPTF2_2Sample$CTX_only_COUPTF2,
                                        CTX_BG_ONLY = varsConfirmedInCOUPTF2_2Sample$CTX_BG_only_COUPTF2,
                                        BG_ONLY = varsConfirmedInCOUPTF2_2Sample$BG_only_COUPTF2,
                                        L = varsConfirmedInCOUPTF2_2Sample$L_COUPTF2,
                                        R = varsConfirmedInCOUPTF2_2Sample$R_COUPTF2,
                                        LR_ONLY = varsConfirmedInCOUPTF2_2Sample$Left_only_COUPTF2 - varsConfirmedInCOUPTF2_2Sample$Right_only_COUPTF2, 
                                        L_BR_PF_ONLY = varsConfirmedInCOUPTF2_2Sample$L_BR_PF_only_COUPTF2,
                                        L_BR_F_ONLY = varsConfirmedInCOUPTF2_2Sample$L_BR_F_only_COUPTF2,
                                        L_BR_O_ONLY = varsConfirmedInCOUPTF2_2Sample$L_BR_O_only_COUPTF2,
                                        L_BR_INS_ONLY = varsConfirmedInCOUPTF2_2Sample$L_BR_Ins_only_COUPTF2,
                                        R_BR_F_ONLY = varsConfirmedInCOUPTF2_2Sample$R_BR_F_only_COUPTF2,
                                        R_BR_O_ONLY = varsConfirmedInCOUPTF2_2Sample$R_BR_O_only_COUPTF2,
                                        R_BR_INS_ONLY = varsConfirmedInCOUPTF2_2Sample$R_BR_Ins_only_COUPTF2,
                                        IN_ONLY = varsConfirmedInCOUPTF2_2Sample$IN_only_COUPTF2,
                                        COUPTF2 = varsConfirmedInCOUPTF2_2Sample$COUPTF2_COUPTF2,
                                        COUP_ONLY = varsConfirmedInCOUPTF2_2Sample$COUP_only_COUPTF2,
                                        COUP_BR = varsConfirmedInCOUPTF2_2Sample$COUP_BR_COUPTF2,
                                        COUPTF2_ENRICHED = varsConfirmedInCOUPTF2_2Sample$COUPTF2_enriched_COUPTF2,
                                        col = color_list)#,
                                        #annotation_legend_param = legendDets)

coupMapUpdate = Heatmap(pearson_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "euclidean", clustering_distance_columns = "euclidean",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
coupMapUpdate
dev.off()

pdf("220_correlation_COUP_Pearson_Manhattan_Dist.pdf", width = unit(20, "cm"), height = unit(8, "cm"))

varsConfirmedInCOUPTF2_2Sample = varsConfirmedInCOUPTF2_2Sample[order(varsConfirmedInCOUPTF2_2Sample$X), ]

coupMapUpdate = Heatmap(spearman_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "manhattan", clustering_distance_columns = "manhattan",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
coupMapUpdate
dev.off()

pdf("220_correlation_COUP_Spearman.pdf", width = unit(16, "cm"), height = unit(8, "cm"))

varsConfirmedInCOUPTF2_2Sample = varsConfirmedInCOUPTF2_2Sample[order(varsConfirmedInCOUPTF2_2Sample$X), ]

coupMapUpdate = Heatmap(spearman_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "euclidean", clustering_distance_columns = "euclidean",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
coupMapUpdate
dev.off()

# CTX TBR1 vs BG DLX1 vs HIP TBR1 Variant Matrix for Determination of Earlier Divergence of HIP from CTX+BG

In [23]:
#CBH
# CTX  |  BG  |  HIP |
# TBR1 | DLX1 | TBR1 |

cbhVars = subset(confirmedVars, CTX_BG_HIP_relationship == 1)
varsConfirmedIn2Sample = subset(annotated, sample_number_presented_CBH >= 2) # using CBH columns
#varsConfirmedIn2Sample = subset(varsConfirmedIn2Sample, (CTX_CBH == 1 & TBR_only_CBH == 1) | (BG_CBH == 1 & DLX1_CBH == 1) | (HIP_CBH == 1 & TBR_only_CBH == 1) )
varsConfirmedIn2Sample = subset(varsConfirmedIn2Sample, (CTX_CBH == 1) | (BG_CBH == 1) | (HIP_CBH == 1) )
confirmedCBH_vars = subset(cbhVars, CHROM.POS.REF.ALT %in% varsConfirmedIn2Sample$variant_list)

onlyMAFofVars = split(sqrt(confirmedCBH_vars$MAF), confirmedCBH_vars$CHROM.POS.REF.ALT) #now using sqrt(MAF) 4/14/23
df_encoded <- model.matrix(~ . - 1, data = onlyMAFofVars) #converts to matrix format with col variants, row MAF
pearson_corr_matrix = round(cor(df_encoded, method = 'pearson', use = 'pairwise.complete.obs'), 4)
spearman_corr_matrix = round(cor(df_encoded, method = 'spearman', use = 'pairwise.complete.obs'), 4)

varsConfirmedIn2Sample = varsConfirmedIn2Sample[, colSums(varsConfirmedIn2Sample != 0) > 0] #removes all 0 value columns

pdf("220_correlation_CBH_Pearson.pdf", width = unit(20, "cm"), height = unit(8, "cm"))

legendDets = list(
BRAIN_ONLY = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Brain Only")),
CTX = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Cortex")),
BG = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Basal Ganglia")),
THAL = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Thalamus")),
PF = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("PF")),
F = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("F")),
P = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("P")),
O = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("O")),
T = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("T")),
INS = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("INS")),
PUT = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("PUT")),
GP = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("GP")),
CTX_ONLY = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("CTX Only")),
CTX_BG_ONLY = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("CTX+BG Only")),
BG_ONLY = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("BG Only")),
L = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Left")),
R = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Right")),
LR_ONLY = list(
        title = "",
        at = c(-1, 1),
        color_bar = "discrete",
        labels = c("Right Only", "Left Only")),
DARPP32M.NeuNP = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("DARPP32M.NeuNP")),
DARPP32P.NeuNP = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("DARPP32P.NeuNP")),
LHX2 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("LHX2")),
OLIG2 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("OLIG2")),
PU1 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("PU1")),
NeuN = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("NeuN")),
IN_ONLY = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("Interneuron Only")),
COUPTF2 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("COUPTF2")),
COUPTF2_ENRICHED = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("COUPTF2 Enriched")),
COUP_ONLY = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("COUPTF2 only")),
COUP_BR = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("COUPTF2 Brain only")),
TBR1 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("TBR1")),
TBR_BR = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("TBR1 Brain only")),
DLX1 = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("DLX1")),
DLX_BR = list(
        title = "",
        at = c(1),
        color_bar = "discrete",
        labels = c("DLX1 Brain only")))

varsConfirmedIn2Sample = varsConfirmedIn2Sample[order(varsConfirmedIn2Sample$X), ]
names(varsConfirmedIn2Sample %>% select(ends_with("_CBH")))

left_right_cell_col_ann = HeatmapAnnotation(simple_anno_size = unit(0.5, "cm"), #previously 0.2
                                        which = "row", 
                                        annotation_name_gp = gpar(fontsize = 6), #previously 4 fontsize
                                        #BRAIN_ONLY = varsConfirmedIn2Sample$Brain_only_CBH,
                                        #INS = varsConfirmedIn2Sample$Ins_CBH,
                                        #CAU = varsConfirmedIn2Sample$Cau_CBH,
                                        #PUT = varsConfirmedIn2Sample$Put_CBH,
                                        #GP = varsConfirmedIn2Sample$GP_CBH,
                                        #GP_ONLY = varsConfirmedIn2Sample$GP_only_CBH,
                                        #PF = varsConfirmedIn2Sample$PF_CBH,
                                        #F = varsConfirmedIn2Sample$F_CBH,
                                        #P = varsConfirmedIn2Sample$P_CBH,
                                        #O = varsConfirmedIn2Sample$O_CBH,
                                        #T = varsConfirmedIn2Sample$T_CBH,
                                        #CA1 = varsConfirmedIn2Sample$CA1_CBH,
                                        #CA3 = varsConfirmedIn2Sample$CA3_CBH,
                                        #DG = varsConfirmedIn2Sample$DG_CBH,
                                        IN_ONLY = varsConfirmedIn2Sample$IN_only_CBH,
                                        #DLX1 = varsConfirmedIn2Sample$DLX1_CBH,
                                        DLX_BR = varsConfirmedIn2Sample$DLX_BR_CBH,
                                        #TBR1 = varsConfirmedIn2Sample$TBR1_CBH,
                                        #TBR_ONLY = varsConfirmedIn2Sample$TBR_only_CBH,
                                        TBR_BR = varsConfirmedIn2Sample$TBR_BR_CBH,
                                        CTX = varsConfirmedIn2Sample$CTX_CBH,
                                        BG = varsConfirmedIn2Sample$BG_CBH,
                                        HIP = varsConfirmedIn2Sample$HIP_CBH,
                                        CTX_ONLY = varsConfirmedIn2Sample$CTX_only_CBH,
                                        BG_ONLY = varsConfirmedIn2Sample$BG_only_CBH,
                                        HIP_ONLY = varsConfirmedIn2Sample$HIP_only_CBH,
                                        CTX_BG = varsConfirmedIn2Sample$CTX_BG_shared_CBH,
                                        BG_HIP = varsConfirmedIn2Sample$BG_HIP_shared_CBH,
                                        CTX_HIP = varsConfirmedIn2Sample$CTX_HIP_shared_CBH,
                                        col = color_list)#,
                                        #annotation_legend_param = legendDets)

cbhMapUpdate = Heatmap(pearson_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "euclidean", clustering_distance_columns = "euclidean",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
cbhMapUpdate
dev.off()

pdf("220_correlation_CBH_Pearson_Manhattan_Dist.pdf", width = unit(20, "cm"), height = unit(8, "cm"))
varsConfirmedIn2Sample = varsConfirmedIn2Sample[order(varsConfirmedIn2Sample$X), ]
cbhMapUpdate = Heatmap(pearson_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "manhattan", clustering_distance_columns = "manhattan",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
cbhMapUpdate
dev.off()

pdf("220_correlation_CBH_Spearman.pdf", width = unit(20, "cm"), height = unit(8, "cm"))
varsConfirmedIn2Sample = varsConfirmedIn2Sample[order(varsConfirmedIn2Sample$X), ]
cbhMapUpdate = Heatmap(spearman_corr_matrix,
        col = colorRamp2(c(-1, 0, 1), c("#0d0dad", "white", "firebrick3")),
        row_names_gp = gpar(fontsize = "1"), column_names_gp = gpar(fontsize = "1"), name = "Correlation",
        left_annotation = left_right_cell_col_ann,
        clustering_method_rows = "complete", clustering_method_columns = "complete", 
        clustering_distance_rows = "euclidean", clustering_distance_columns = "euclidean",
        cluster_rows = TRUE, cluster_columns = TRUE,
        width = unit(16, "cm"),
        heatmap_height = unit(18, "cm")) 
cbhMapUpdate
dev.off()
