In [3]:
library(ggplot2)
library(dplyr)
library(gplots)
library(reshape2)
library(umap)
library(ggExtra) 
library(pals)
library(RColorBrewer)

filtered <- read.table("../20230101_QC/final_summary_anno3.txt",header=T,sep="\t")


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘gplots’


The following object is masked from ‘package:stats’:

    lowess




# COUPTF2

In [4]:

print(paste("A total of variant calls: ",nrow(filtered)))
print(paste("The number of variants included in ampliseq: ", length(unique(filtered$CHROM.POS.REF.ALT))))
colnames(filtered)
filtered2 <- subset(filtered, ID %in% c(
    '7669_R_Ant_Cau_DLX1','7669_R_sec3_Cau_DLX1',
    '7669_R_Ant_Put_DLX1','7669_R_sec3_Put_DLX1',
    #'7669_L_sec1_Cau_DLX1','7669_L_sec1_Put_DLX1',
    '7669_R_O_1_COUPTF2','7669_R_P_1_COUPTF2',
    '7669_R_T_1_COUPTF2', 
    '7669_R_I_1_COUPTF2','7669_R_F_1_COUPTF2',
    '7669_R_O_1_TBR1','7669_R_P_1_TBR1','7669_R_T_1_TBR1','7669_R_I_1_TBR1','7669_R_F_1_TBR1',
    '7669_R_O_1_DLX1','7669_R_P_1_DLX1','7669_R_T_1_DLX1','7669_R_I_1_DLX1','7669_R_F_1_DLX1'))
filtered2$Punch<-as.character(filtered2$Punch)
filtered2$Note<-as.character(filtered2$Note)

mosaic_filtered <- filtered2[filtered2$Mosaic_shared_btw_WGS_Amp==1,]
mosaic_filtered_cutoff <- subset(mosaic_filtered, LOWER_CI>0.002360687 & UPPER_CI<0.4562841 & NORMAL_LOWER_CI<0.002360687)

print(paste("The number of validated MVs: ", length(unique(mosaic_filtered$CHROM.POS.REF.ALT))))
print(paste("The number of validated MV calls: ", length(mosaic_filtered_cutoff$CHROM.POS.REF.ALT)))

# statistics

variant_list<-unique(mosaic_filtered$CHROM.POS.REF.ALT)
variant_list<-as.data.frame(variant_list, row.names = variant_list) # assign rownames as CHROM.POS.REF.ALT
print(paste("The number of validated MVs (in the variant_list variable): ", nrow(variant_list)))
for (i in 1:nrow(variant_list)){
    xx <- filtered2[filtered2$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
    xx <- subset(xx, LOWER_CI>0.002360687 & UPPER_CI<0.4562841 & NORMAL_LOWER_CI<0.002360687 )
    n_row <- nrow(xx)
    variant_list$sample_number_presented[i] <- n_row
    }
print(paste("The number of validated MVs and detected in this 2023 Ampliseq: ", nrow(variant_list[variant_list$sample_number_presented>0,])))
# Non-private Mosaic variants
print(paste("The number of shared validated MVs : ",nrow(variant_list[variant_list$sample_number_presented>1,])))

[1] "A total of variant calls:  353682"
[1] "The number of variants included in ampliseq:  2406"


[1] "The number of validated MVs:  780"
[1] "The number of validated MV calls:  1453"
[1] "The number of validated MVs (in the variant_list variable):  780"
[1] "The number of validated MVs and detected in this 2023 Ampliseq:  135"
[1] "The number of shared validated MVs :  123"


In [5]:
for (j in c('HEART','ADRENAL','CB','CTX','LIVER','KIDNEY','POA','BG','THAL','SKIN','OLF','HIP')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Organ)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('L','R')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Hemisphere)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('I','F','P','O','T','Cau','Put','Thal','GP','Hip','POA','EC','Olf','CC','mO','Cb')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Region)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('DLX1','TBR1','COUPTF2','Bulk','NeuN')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Cell_Type)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

In [6]:
names(variant_list)
#'HEART','ADRENAL','LIVER','KIDNEY','SKIN','POA','BG','THAL','HIP','OLF''CB','CTX',
Brain_only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0) & (POA==1 | BG==1 | THAL==1 | HIP==1 | OLF==1 | CB==1 | CTX==1))
variant_list$Brain_only[variant_list$variant_list %in% rownames(Brain_only_variants)]<-1
variant_list$Brain_only[!variant_list$variant_list %in% rownames(Brain_only_variants)]<-0

CTX_only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & BG==0 & THAL==0 & HIP==0 & OLF==0 & CB==0) & CTX==1)
variant_list$CTX_only[variant_list$variant_list %in% rownames(CTX_only_variants)]<-1
variant_list$CTX_only[!variant_list$variant_list %in% rownames(CTX_only_variants)]<-0

only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & HIP==0 & OLF==0 & CB==0 & CTX==0) & (BG==1))
variant_list$BG_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$BG_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & BG==0 & HIP==0 & OLF==0 & CB==0 & CTX==0) & (THAL==1))
variant_list$THAL_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$THAL_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & HIP==0 & OLF==0 & CB==0)& (CTX==1 | BG==1))
variant_list$CTX_BG_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$CTX_BG_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & HIP==0 & OLF==0 & CB==0)& (CTX==1 & BG==1))
variant_list$CTX_BG_shared[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$CTX_BG_shared[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & BG==0 & HIP==0 & OLF==0 & CB==0)& (CTX==1 & THAL==1))
variant_list$CTX_THAL_shared[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$CTX_THAL_shared[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & CTX==0 & OLF==0 & CB==0)& (HIP==1 | BG==1))
# variant_list$BG_HIP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$BG_HIP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & BG==0 & OLF==0 & CB==0)& (CTX==1 | HIP==1))
# variant_list$CTX_HIP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$CTX_HIP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0



only_variants<-subset(variant_list, (L==1) & (R==0) & (Brain_only==1))
variant_list$Left_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$Left_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (R==1) & (L==0) & (Brain_only==1))
variant_list$Right_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$Right_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (TBR1==0) & (DLX1==1 | COUPTF2==1) & (Brain_only==1))
variant_list$IN_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$IN_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (TBR1==0) & (COUPTF2==1) & (Brain_only==1))
variant_list$COUP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$COUP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (DLX1==0 & COUPTF2==0) & (TBR1==1) & (Brain_only==1))
variant_list$TBR_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$TBR_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (TBR1==1) & (Brain_only==1))
variant_list$TBR_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$TBR_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (DLX1==1) & (Brain_only==1))
variant_list$DLX_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$DLX_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0

In [7]:
#---------------------------BG-DLX1 vs CTX-TBR1 vs CTX-COUPTFII-------------------
only_variants<-subset(variant_list, (TBR1==1))
variant_list$TBR_BR_CTX[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$TBR_BR_CTX[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (DLX1==1))
variant_list$DLX_BR_BG[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$DLX_BR_BG[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (COUPTF2==1))
variant_list$COUP_BR_CTX[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$COUP_BR_CTX[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (TBR_BR_CTX==1) & (DLX_BR_BG==1) & (COUP_BR_CTX==0))
variant_list$TBR_BR_CTXnDLX_BR_BG[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$TBR_BR_CTXnDLX_BR_BG[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (TBR_BR_CTX==1) & (DLX_BR_BG==0) & (COUP_BR_CTX==1))
variant_list$TBR_BR_CTXnCOUP_BR_CTX[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$TBR_BR_CTXnCOUP_BR_CTX[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (TBR_BR_CTX==0) & (DLX_BR_BG==1) & (COUP_BR_CTX==1))
variant_list$DLX_BR_BGnCOUP_BR_CTX[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$DLX_BR_BGnCOUP_BR_CTX[!variant_list$variant_list %in% rownames(only_variants)]<-0

names(variant_list)

In [8]:
print(paste("TBR_BR_CTX variants: ",nrow(variant_list[variant_list$TBR_BR_CTX==1,])))
print(paste("DLX_BR_BG variants: ",nrow(variant_list[variant_list$DLX_BR_BG==1,])))
print(paste("COUP_BR_CTX variants: ",nrow(variant_list[variant_list$COUP_BR_CTX==1,])))
print(paste("TBR_BR_CTXnDLX_BR_BG variants: ",nrow(variant_list[variant_list$TBR_BR_CTXnDLX_BR_BG==1,])))
print(paste("TBR_BR_CTXnCOUP_BR_CTX variants: ",nrow(variant_list[variant_list$TBR_BR_CTXnCOUP_BR_CTX==1,])))
print(paste("DLX_BR_BGnCOUP_BR_CTX variants: ",nrow(variant_list[variant_list$DLX_BR_BGnCOUP_BR_CTX==1,])))

[1] "TBR_BR_CTX variants:  116"
[1] "DLX_BR_BG variants:  130"
[1] "COUP_BR_CTX variants:  101"
[1] "TBR_BR_CTXnDLX_BR_BG variants:  21"
[1] "TBR_BR_CTXnCOUP_BR_CTX variants:  1"
[1] "DLX_BR_BGnCOUP_BR_CTX variants:  2"


In [9]:
sort(unique(filtered2$ID))

In [10]:
write.csv(variant_list,"./780_variant_annotation_COUPTF2_origin2.csv")