In [1]:
library(ggplot2)
library(dplyr)
library(gplots)
library(reshape2)
library(umap)
library(ggExtra) 
library(pals)
library(RColorBrewer)

filtered <- read.table("../7614_final_summary_anno3.txt",header=T,sep="\t")

names(filtered)

unique(filtered$Hemisphere)
unique(filtered$Region)
unique(filtered$Punch)
unique(filtered$Cell_Type)
unique(filtered$Location)
unique(filtered$AntPost)
unique(filtered$DorVen)
unique(filtered$Organ)
unique(filtered$Note)

print(paste("A total of variant calls: ",nrow(filtered)))
print(paste("The number of variants included in ampliseq: ", length(unique(filtered$CHROM.POS.REF.ALT))))

filtered2 <- subset(filtered, ID %in% c(
'7614-L-2-Cau-DLX1','7614-L-3-Cau-DLX1','7614-L-4-Put-DLX1','7614-L-5-Cau-DLX1','7614-L-5-Put-DLX1','7614-L-7-Cau-DLX1','7614-L-7-Put-DLX1',
                                               '7614-L-PF-TBR1', '7614-L-F-TBR1','7614-L-9-Ins-TBR1','7614-L-T-TBR1','7614-L-O-TBR1',
                                               '7614-L-PF-COUPTF2','7614-L-F-COUPTF2','7614-L-9-Ins-COUPTF2','7614-L-T-COUPTF2','7614-L-O-COUPTF2',
                                               '7614-L-PF-DLX1','7614-L-F-34-DLX1','7614-L-9-Ins-DLX1','7614-L-T-DLX1','7614-L-O-DLX1'
)) # restrict annotation of variants based on the only selected sorted sample information
filtered2$Punch<-as.character(filtered2$Punch)
filtered2$Note<-as.character(filtered2$Note)

mosaic_filtered <- filtered2[filtered2$Mosaic_shared_btw_WGS_Amp==1,]
mosaic_filtered_cutoff <- subset(mosaic_filtered, LOWER_CI>0.001677998 & UPPER_CI<0.3923302 & NORMAL_LOWER_CI<0.001677998)

print(paste("The number of validated MVs: ", length(unique(mosaic_filtered$CHROM.POS.REF.ALT))))
print(paste("The number of validated MV calls: ", length(mosaic_filtered_cutoff$CHROM.POS.REF.ALT)))

# statistics

variant_list<-unique(mosaic_filtered$CHROM.POS.REF.ALT)
variant_list<-as.data.frame(variant_list, row.names = variant_list) # assign rownames as CHROM.POS.REF.ALT
print(paste("The number of validated MVs (in the variant_list variable): ", nrow(variant_list)))
for (i in 1:nrow(variant_list)){
    xx <- filtered2[filtered2$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
    xx <- subset(xx, LOWER_CI>0.001677998 & UPPER_CI<0.3923302 & NORMAL_LOWER_CI<0.001677998 )
    n_row <- nrow(xx)
    variant_list$sample_number_presented[i] <- n_row
    }
print(paste("The number of validated MVs and detected in this 2023 Ampliseq: ", nrow(variant_list[variant_list$sample_number_presented>0,])))
# Non-private Mosaic variants
print(paste("The number of shared validated MVs : ",nrow(variant_list[variant_list$sample_number_presented>1,])))


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘gplots’


The following object is masked from ‘package:stats’:

    lowess




[1] "A total of variant calls:  335124"
[1] "The number of variants included in ampliseq:  1044"
[1] "The number of validated MVs:  287"
[1] "The number of validated MV calls:  1409"
[1] "The number of validated MVs (in the variant_list variable):  287"
[1] "The number of validated MVs and detected in this 2023 Ampliseq:  124"
[1] "The number of shared validated MVs :  114"


In [2]:
for (j in c('CTX','CB','Heart','Kidney','BG','THAL','Liver')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Organ)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('L','R')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Hemisphere)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('PF','F','P','O','T','Ins')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Region)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('Cau','Put','GP')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Region)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('DLX1','TBR1','COUPTF2','Bulk')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Cell_Type)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

In [3]:
Brain_only_variants<-subset(variant_list, (Liver==0 & Heart==0 & Kidney==0) & (CTX==1 | CB==1 | BG==1 |  THAL==1))
variant_list$Brain_only[variant_list$variant_list %in% rownames(Brain_only_variants)]<-1
variant_list$Brain_only[!variant_list$variant_list %in% rownames(Brain_only_variants)]<-0

CTX_only_variants<-subset(variant_list, (Liver==0 & Heart==0 & Kidney==0 & CB==0 & BG==0 & THAL==0) & (CTX==1))
variant_list$CTX_only[variant_list$variant_list %in% rownames(CTX_only_variants)]<-1
variant_list$CTX_only[!variant_list$variant_list %in% rownames(CTX_only_variants)]<-0

only_variants<-subset(variant_list, (Liver==0 & Heart==0 & Kidney==0 & CB==0 & THAL==0) & (CTX==1 | BG==1))
variant_list$CTX_BG_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$CTX_BG_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (Liver==0 & Heart==0 & Kidney==0 & CB==0 & THAL==0 & CTX==0) & (BG==1))
variant_list$BG_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$BG_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (Liver==0 & Heart==0 & Kidney==0 & CB==0 & BG==0 & CTX==0) & (THAL==1))
variant_list$THAL_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$THAL_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

#--------------------------------hemisphere only----------------------------------------
only_variants<-subset(variant_list, (L==1) & (R==0) & (Brain_only==1))
variant_list$Left_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$Left_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (R==1) & (L==0) & (Brain_only==1))
variant_list$Right_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$Right_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

#--------------------------------lobe only----------------------------------------
only_variants<-subset(variant_list, (PF==1) & (F==0) & (P==0) & (O==0) & (Ins==0) & (Left_only==1) & (Brain_only==1))
variant_list$L_BR_PF_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$L_BR_PF_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (PF==0) & (F==1) & (P==0) & (O==0) & (Ins==0) & (Left_only==1) & (Brain_only==1))
variant_list$L_BR_F_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$L_BR_F_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (PF==0) & (F==0) & (P==1) & (O==0) & (Ins==0) & (Left_only==1) & (Brain_only==1))
variant_list$L_BR_P_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$L_BR_P_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (PF==0) & (F==0) & (P==0) & (O==1) & (Ins==0) & (Left_only==1) & (Brain_only==1))
variant_list$L_BR_O_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$L_BR_O_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (PF==0) & (F==0) & (P==0) & (O==0) & (Ins==1) & (Left_only==1) & (Brain_only==1))
variant_list$L_BR_Ins_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$L_BR_Ins_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (PF==1) & (F==0) & (P==0) & (O==0) & (Ins==0) & (Right_only==1) & (Brain_only==1))
variant_list$R_BR_PF_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$R_BR_PF_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (PF==0) & (F==1) & (P==0) & (O==0) & (Ins==0) & (Right_only==1) & (Brain_only==1))
variant_list$R_BR_F_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$R_BR_F_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (PF==0) & (F==0) & (P==1) & (O==0) & (Ins==0) & (Right_only==1) & (Brain_only==1))
variant_list$R_BR_P_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$R_BR_P_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (PF==0) & (F==0) & (P==0) & (O==1) & (Ins==0) & (Right_only==1) & (Brain_only==1))
variant_list$R_BR_O_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$R_BR_O_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (PF==0) & (F==0) & (P==0) & (O==0) & (Ins==1) & (Right_only==1) & (Brain_only==1))
variant_list$R_BR_Ins_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$R_BR_Ins_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

#----------------------------------------------------------------------------------------------------------------------

only_variants<-subset(variant_list, (TBR1==0) & (DLX1==1 | COUPTF2==1) & (Brain_only==1))
variant_list$IN_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$IN_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (TBR1==0) & (COUPTF2==1) & (Brain_only==1))
variant_list$COUP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$COUP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (DLX1==0 & COUPTF2==0) & (TBR1==1) & (Brain_only==1))
variant_list$TBR_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$TBR_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (TBR1==1) & (Brain_only==1))
variant_list$TBR_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$TBR_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (DLX1==1) & (Brain_only==1))
variant_list$DLX_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$DLX_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (COUPTF2==1) & (Brain_only==1))
variant_list$COUP_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$COUP_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, ('DARPP32P-NeuNP'==1) & (Brain_only==1))
variant_list$MSN_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$MSN_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, ('DARPP32M-NeuNP'==1) & (Brain_only==1))
variant_list$nonMSN_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$nonMSN_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0



In [4]:
#---------------------------BG-DLX1 vs CTX-TBR1 vs CTX-COUPTFII-------------------
only_variants<-subset(variant_list, (TBR1==1) )
variant_list$TBR_BR_CTX[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$TBR_BR_CTX[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (DLX1==1) )
variant_list$DLX_BR_BG[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$DLX_BR_BG[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (COUPTF2==1) )
variant_list$COUP_BR_CTX[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$COUP_BR_CTX[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (TBR_BR_CTX==1) & (DLX_BR_BG==1) & !(COUP_BR_CTX==1))
variant_list$TBR_BR_CTXnDLX_BR_BG[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$TBR_BR_CTXnDLX_BR_BG[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (TBR_BR_CTX==1) & (COUP_BR_CTX==1) & !(DLX_BR_BG==1))
variant_list$TBR_BR_CTXnCOUP_BR_CTX[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$TBR_BR_CTXnCOUP_BR_CTX[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (DLX_BR_BG==1) & (COUP_BR_CTX==1) & !(TBR_BR_CTX==1))
variant_list$DLX_BR_BGnCOUP_BR_CTX[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$DLX_BR_BGnCOUP_BR_CTX[!variant_list$variant_list %in% rownames(only_variants)]<-0

names(variant_list)

In [5]:
print(paste("TBR_BR_CTX variants: ",nrow(variant_list[variant_list$TBR_BR_CTX==1,])))
print(paste("DLX_BR_BG variants: ",nrow(variant_list[variant_list$DLX_BR_BG==1,])))
print(paste("COUP_BR_CTX variants: ",nrow(variant_list[variant_list$COUP_BR_CTX==1,])))
print(paste("TBR_BR_CTXnDLX_BR_BG variants: ",nrow(variant_list[variant_list$TBR_BR_CTXnDLX_BR_BG==1,])))
print(paste("TBR_BR_CTXnCOUP_BR_CTX variants: ",nrow(variant_list[variant_list$TBR_BR_CTXnCOUP_BR_CTX==1,])))
print(paste("DLX_BR_BGnCOUP_BR_CTX variants: ",nrow(variant_list[variant_list$DLX_BR_BGnCOUP_BR_CTX==1,])))

[1] "TBR_BR_CTX variants:  106"
[1] "DLX_BR_BG variants:  117"
[1] "COUP_BR_CTX variants:  90"
[1] "TBR_BR_CTXnDLX_BR_BG variants:  20"
[1] "TBR_BR_CTXnCOUP_BR_CTX variants:  1"
[1] "DLX_BR_BGnCOUP_BR_CTX variants:  8"


In [6]:
sort(unique(filtered2$ID))

In [7]:
variant_list2 <- variant_list[,-1]

In [8]:
write.csv(variant_list2,"./287_variant_annotation_COUPTF2_origin2.csv")