In [1]:
library(ggplot2)
library(dplyr)
library(gplots)
library(reshape2)
library(umap)
library(ggExtra) 
library(pals)
library(RColorBrewer)

filtered <- read.table("../20230101_QC/final_summary_anno3.txt",header=T,sep="\t")


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘gplots’


The following object is masked from ‘package:stats’:

    lowess




# COUPTF2

In [2]:

print(paste("A total of variant calls: ",nrow(filtered)))
print(paste("The number of variants included in ampliseq: ", length(unique(filtered$CHROM.POS.REF.ALT))))

filtered2 <- subset(filtered, Sort_main %in% c('1')) %>% subset(Cell_Type %in% c('COUPTF2'))
filtered2$Punch<-as.character(filtered2$Punch)
filtered2$Note<-as.character(filtered2$Note)

mosaic_filtered <- filtered2[filtered2$Mosaic_shared_btw_WGS_Amp==1,]
mosaic_filtered_cutoff <- subset(mosaic_filtered, LOWER_CI>0.002360687 & UPPER_CI<0.4562841 & NORMAL_LOWER_CI<0.002360687)

print(paste("The number of validated MVs: ", length(unique(mosaic_filtered$CHROM.POS.REF.ALT))))
print(paste("The number of validated MV calls: ", length(mosaic_filtered_cutoff$CHROM.POS.REF.ALT)))

# statistics

variant_list<-unique(mosaic_filtered$CHROM.POS.REF.ALT)
variant_list<-as.data.frame(variant_list, row.names = variant_list) # assign rownames as CHROM.POS.REF.ALT
print(paste("The number of validated MVs (in the variant_list variable): ", nrow(variant_list)))
for (i in 1:nrow(variant_list)){
    xx <- filtered2[filtered2$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
    xx <- subset(xx, LOWER_CI>0.002360687 & UPPER_CI<0.4562841 & NORMAL_LOWER_CI<0.002360687 )
    n_row <- nrow(xx)
    variant_list$sample_number_presented[i] <- n_row
    }
print(paste("The number of validated MVs and detected in this 2023 Ampliseq: ", nrow(variant_list[variant_list$sample_number_presented>0,])))
# Non-private Mosaic variants
print(paste("The number of shared validated MVs : ",nrow(variant_list[variant_list$sample_number_presented>1,])))

[1] "A total of variant calls:  353682"
[1] "The number of variants included in ampliseq:  2406"
[1] "The number of validated MVs:  780"
[1] "The number of validated MV calls:  1558"
[1] "The number of validated MVs (in the variant_list variable):  780"
[1] "The number of validated MVs and detected in this 2023 Ampliseq:  149"
[1] "The number of shared validated MVs :  128"


In [3]:
for (j in c('HEART','ADRENAL','CB','CTX','LIVER','KIDNEY','POA','BG','THAL','SKIN','OLF','HIP')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Organ)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('L','R')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Hemisphere)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('I','F','P','O','T','Cau','Put','Thal','GP','Hip','POA','EC','Olf','CC','mO','Cb')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Region)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('DLX1','TBR1','COUPTF2','Bulk','NeuN')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Cell_Type)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

In [4]:
names(variant_list)
#'HEART','ADRENAL','LIVER','KIDNEY','SKIN','POA','BG','THAL','HIP','OLF''CB','CTX',
Brain_only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0) & (POA==1 | BG==1 | THAL==1 | HIP==1 | OLF==1 | CB==1 | CTX==1))
variant_list$Brain_only[variant_list$variant_list %in% rownames(Brain_only_variants)]<-1
variant_list$Brain_only[!variant_list$variant_list %in% rownames(Brain_only_variants)]<-0

CTX_only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & BG==0 & THAL==0 & HIP==0 & OLF==0 & CB==0) & CTX==1)
variant_list$CTX_only[variant_list$variant_list %in% rownames(CTX_only_variants)]<-1
variant_list$CTX_only[!variant_list$variant_list %in% rownames(CTX_only_variants)]<-0

only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & HIP==0 & OLF==0 & CB==0 & CTX==0) & (BG==1))
variant_list$BG_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$BG_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & BG==0 & HIP==0 & OLF==0 & CB==0 & CTX==0) & (THAL==1))
variant_list$THAL_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$THAL_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & HIP==0 & OLF==0 & CB==0)& (CTX==1 | BG==1))
variant_list$CTX_BG_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$CTX_BG_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & HIP==0 & OLF==0 & CB==0)& (CTX==1 & BG==1))
variant_list$CTX_BG_shared[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$CTX_BG_shared[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & BG==0 & HIP==0 & OLF==0 & CB==0)& (CTX==1 & THAL==1))
variant_list$CTX_THAL_shared[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$CTX_THAL_shared[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & CTX==0 & OLF==0 & CB==0)& (HIP==1 | BG==1))
# variant_list$BG_HIP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$BG_HIP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & BG==0 & OLF==0 & CB==0)& (CTX==1 | HIP==1))
# variant_list$CTX_HIP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$CTX_HIP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0



only_variants<-subset(variant_list, (L==1) & (R==0) & (Brain_only==1))
variant_list$Left_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$Left_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (R==1) & (L==0) & (Brain_only==1))
variant_list$Right_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$Right_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (TBR1==0) & (DLX1==1 | COUPTF2==1) & (Brain_only==1))
variant_list$IN_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$IN_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (TBR1==0) & (COUPTF2==1) & (Brain_only==1))
variant_list$COUP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$COUP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (DLX1==0 & COUPTF2==0) & (TBR1==1) & (Brain_only==1))
variant_list$TBR_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$TBR_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (TBR1==1) & (Brain_only==1))
variant_list$TBR_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$TBR_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (DLX1==1) & (Brain_only==1))
variant_list$DLX_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$DLX_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0

In [5]:
print(paste("CTX_BG only variants: ",nrow(variant_list[variant_list$CTX_BG_only==1,])))
print(paste("CTX and THAL shared variants: ",nrow(variant_list[variant_list$CTX_THAL_shared==1,])))
print(paste("CTX and HIP shared variants: ",nrow(variant_list[variant_list$CTX_HIP_shared==1,])))
print(paste("THAL and HIP shared variants: ",nrow(variant_list[variant_list$HIP_THAL_shared==1,])))

print(paste("CTX only variants: ",nrow(variant_list[variant_list$CTX_only==1,])))
print(paste("HIP only variants: ",nrow(variant_list[variant_list$HIP_only==1,])))
print(paste("THAL only variants: ",nrow(variant_list[variant_list$THAL_only==1,])))
print(paste("Left only variants: ",nrow(variant_list[variant_list$Left_only==1,])))
print(paste("Right only variants: ",nrow(variant_list[variant_list$Right_only==1,])))


[1] "CTX_BG only variants:  33"
[1] "CTX and THAL shared variants:  16"
[1] "CTX and HIP shared variants:  0"
[1] "THAL and HIP shared variants:  0"
[1] "CTX only variants:  33"
[1] "HIP only variants:  0"
[1] "THAL only variants:  4"
[1] "Left only variants:  30"
[1] "Right only variants:  36"


In [6]:
variant_list2 <- variant_list[,-1]
colnames(variant_list2)<- paste0(colnames(variant_list2), "_COUPTF2")
variant_list_bulk <- read.csv("./780_variant_annotation.csv",header=T,sep=",", row.names = 1)
variant_list2<-cbind(variant_list_bulk,variant_list2)

In [7]:
write.csv(variant_list2,"./780_variant_annotation_COUPTF2.csv")

# TBR1

In [8]:

print(paste("A total of variant calls: ",nrow(filtered)))
print(paste("The number of variants included in ampliseq: ", length(unique(filtered$CHROM.POS.REF.ALT))))

filtered2 <- subset(filtered, Sort_main %in% c('1')) %>% subset(Cell_Type %in% c('TBR1'))
filtered2$Punch<-as.character(filtered2$Punch)
filtered2$Note<-as.character(filtered2$Note)

mosaic_filtered <- filtered2[filtered2$Mosaic_shared_btw_WGS_Amp==1,]
mosaic_filtered_cutoff <- subset(mosaic_filtered, LOWER_CI>0.002360687 & UPPER_CI<0.4562841 & NORMAL_LOWER_CI<0.002360687)

print(paste("The number of validated MVs: ", length(unique(mosaic_filtered$CHROM.POS.REF.ALT))))
print(paste("The number of validated MV calls: ", length(mosaic_filtered_cutoff$CHROM.POS.REF.ALT)))

# statistics

variant_list<-unique(mosaic_filtered$CHROM.POS.REF.ALT)
variant_list<-as.data.frame(variant_list, row.names = variant_list) # assign rownames as CHROM.POS.REF.ALT
print(paste("The number of validated MVs (in the variant_list variable): ", nrow(variant_list)))
for (i in 1:nrow(variant_list)){
    xx <- filtered2[filtered2$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
    xx <- subset(xx, LOWER_CI>0.002360687 & UPPER_CI<0.4562841 & NORMAL_LOWER_CI<0.002360687 )
    n_row <- nrow(xx)
    variant_list$sample_number_presented[i] <- n_row
    }
print(paste("The number of validated MVs and detected in this 2023 Ampliseq: ", nrow(variant_list[variant_list$sample_number_presented>0,])))
# Non-private Mosaic variants
print(paste("The number of shared validated MVs : ",nrow(variant_list[variant_list$sample_number_presented>1,])))

[1] "A total of variant calls:  353682"
[1] "The number of variants included in ampliseq:  2406"
[1] "The number of validated MVs:  780"
[1] "The number of validated MV calls:  1020"
[1] "The number of validated MVs (in the variant_list variable):  780"
[1] "The number of validated MVs and detected in this 2023 Ampliseq:  155"
[1] "The number of shared validated MVs :  133"


In [9]:
for (j in c('CTX','HIP')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Organ)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('L','R')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Hemisphere)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('I','F','P','O','T','Hip')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Region)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

In [10]:
names(variant_list)
#'HEART','ADRENAL','LIVER','KIDNEY','SKIN','POA','BG','THAL','HIP','OLF''CB','CTX',
CTX_only_variants<-subset(variant_list, (HIP==0) & (CTX==1))
variant_list$CTX_only[variant_list$variant_list %in% rownames(CTX_only_variants)]<-1
variant_list$CTX_only[!variant_list$variant_list %in% rownames(CTX_only_variants)]<-0

only_variants<-subset(variant_list, (CTX==0) & (HIP==1))
variant_list$HIP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$HIP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & CTX==0 & OLF==0 & CB==0)& (HIP==1 | BG==1))
# variant_list$BG_HIP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$BG_HIP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & BG==0 & OLF==0 & CB==0)& (CTX==1 | HIP==1))
# variant_list$CTX_HIP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$CTX_HIP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (L==1) & (R==0))
variant_list$Left_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$Left_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (R==1) & (L==0))
variant_list$Right_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$Right_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (TBR1==0) & (DLX1==1 | COUPTF2==1) & (Brain_only==1))
# variant_list$IN_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$IN_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (TBR1==0) & (COUPTF2==1) & (Brain_only==1))
# variant_list$COUP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$COUP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (DLX1==0 & COUPTF2==0) & (TBR1==1) & (Brain_only==1))
# variant_list$TBR_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$TBR_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (TBR1==1) & (Brain_only==1))
# variant_list$TBR_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$TBR_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (DLX1==1) & (Brain_only==1))
# variant_list$DLX_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$DLX_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0

In [11]:
#print(paste("CTX_BG only variants: ",nrow(variant_list[variant_list$CTX_BG_only==1,])))
#print(paste("CTX and THAL shared variants: ",nrow(variant_list[variant_list$CTX_THAL_shared==1,])))
#print(paste("CTX and HIP shared variants: ",nrow(variant_list[variant_list$CTX_HIP_shared==1,])))
#print(paste("THAL and HIP shared variants: ",nrow(variant_list[variant_list$HIP_THAL_shared==1,])))

print(paste("CTX only variants: ",nrow(variant_list[variant_list$CTX_only==1,])))
print(paste("HIP only variants: ",nrow(variant_list[variant_list$HIP_only==1,])))
#print(paste("THAL only variants: ",nrow(variant_list[variant_list$THAL_only==1,])))
print(paste("Left only variants: ",nrow(variant_list[variant_list$Left_only==1,])))
print(paste("Right only variants: ",nrow(variant_list[variant_list$Right_only==1,])))


[1] "CTX only variants:  91"
[1] "HIP only variants:  1"
[1] "Left only variants:  33"
[1] "Right only variants:  38"


In [12]:
variant_list2 <- variant_list[,-1]
colnames(variant_list2)<- paste0(colnames(variant_list2), "_TBR1")
variant_list_bulk <- read.csv("./780_variant_annotation_COUPTF2.csv",header=T,sep=",", row.names = 1)
variant_list2<-cbind(variant_list_bulk,variant_list2)

In [13]:
write.csv(variant_list2,"./780_variant_annotation_COUPTF2_TBR1.csv")

# DLX1

In [14]:

print(paste("A total of variant calls: ",nrow(filtered)))
print(paste("The number of variants included in ampliseq: ", length(unique(filtered$CHROM.POS.REF.ALT))))

filtered2 <- subset(filtered, Sort_main %in% c('1')) %>% subset(Cell_Type %in% c('DLX1'))
filtered2$Punch<-as.character(filtered2$Punch)
filtered2$Note<-as.character(filtered2$Note)

mosaic_filtered <- filtered2[filtered2$Mosaic_shared_btw_WGS_Amp==1,]
mosaic_filtered_cutoff <- subset(mosaic_filtered, LOWER_CI>0.002360687 & UPPER_CI<0.4562841 & NORMAL_LOWER_CI<0.002360687)

print(paste("The number of validated MVs: ", length(unique(mosaic_filtered$CHROM.POS.REF.ALT))))
print(paste("The number of validated MV calls: ", length(mosaic_filtered_cutoff$CHROM.POS.REF.ALT)))

# statistics

variant_list<-unique(mosaic_filtered$CHROM.POS.REF.ALT)
variant_list<-as.data.frame(variant_list, row.names = variant_list) # assign rownames as CHROM.POS.REF.ALT
print(paste("The number of validated MVs (in the variant_list variable): ", nrow(variant_list)))
for (i in 1:nrow(variant_list)){
    xx <- filtered2[filtered2$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
    xx <- subset(xx, LOWER_CI>0.002360687 & UPPER_CI<0.4562841 & NORMAL_LOWER_CI<0.002360687 )
    n_row <- nrow(xx)
    variant_list$sample_number_presented[i] <- n_row
    }
print(paste("The number of validated MVs and detected in this 2023 Ampliseq: ", nrow(variant_list[variant_list$sample_number_presented>0,])))
# Non-private Mosaic variants
print(paste("The number of shared validated MVs : ",nrow(variant_list[variant_list$sample_number_presented>1,])))

[1] "A total of variant calls:  353682"
[1] "The number of variants included in ampliseq:  2406"
[1] "The number of validated MVs:  780"
[1] "The number of validated MV calls:  1908"
[1] "The number of validated MVs (in the variant_list variable):  780"
[1] "The number of validated MVs and detected in this 2023 Ampliseq:  182"
[1] "The number of shared validated MVs :  159"


In [15]:
for (j in c('CTX','BG','THAL','HIP')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Organ)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('L','R')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Hemisphere)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('I','F','P','O','T','Cau','GP')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Region)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

In [16]:
names(variant_list)
#'HEART','ADRENAL','LIVER','KIDNEY','SKIN','POA','BG','THAL','HIP','OLF''CB','CTX',
CTX_only_variants<-subset(variant_list, (BG==0) & (THAL==0) & (HIP==0) & (CTX==1))
variant_list$CTX_only[variant_list$variant_list %in% rownames(CTX_only_variants)]<-1
variant_list$CTX_only[!variant_list$variant_list %in% rownames(CTX_only_variants)]<-0

only_variants<-subset(variant_list, (BG==0) & (THAL==0) & (HIP==1) & (CTX==0))
variant_list$HIP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$HIP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (BG==0) & (THAL==1) & (HIP==0) & (CTX==0))
variant_list$THAL_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$THAL_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (BG==1) & (THAL==0) & (HIP==0) & (CTX==0))
variant_list$BG_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$BG_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (BG==0) & (THAL==0) & (HIP==1) & (CTX==1))
variant_list$CTX_HIP_shared[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$CTX_HIP_shared[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (BG==0) & (HIP==0) & (THAL==1) & (CTX==1))
variant_list$CTX_THAL_shared[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$CTX_THAL_shared[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (HIP==0) & (THAL==0) & (BG==1) & (CTX==1))
variant_list$CTX_BG_shared[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$CTX_BG_shared[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & CTX==0 & OLF==0 & CB==0)& (HIP==1 | BG==1))
# variant_list$BG_HIP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$BG_HIP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & BG==0 & OLF==0 & CB==0)& (CTX==1 | HIP==1))
# variant_list$CTX_HIP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$CTX_HIP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (L==1) & (R==0))
variant_list$Left_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$Left_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (R==1) & (L==0))
variant_list$Right_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$Right_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (TBR1==0) & (DLX1==1 | COUPTF2==1) & (Brain_only==1))
# variant_list$IN_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$IN_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (TBR1==0) & (COUPTF2==1) & (Brain_only==1))
# variant_list$COUP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$COUP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (DLX1==0 & COUPTF2==0) & (TBR1==1) & (Brain_only==1))
# variant_list$TBR_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$TBR_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (TBR1==1) & (Brain_only==1))
# variant_list$TBR_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$TBR_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (DLX1==1) & (Brain_only==1))
# variant_list$DLX_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$DLX_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0

In [17]:
#print(paste("CTX_BG only variants: ",nrow(variant_list[variant_list$CTX_BG_only==1,])))
#print(paste("CTX and THAL shared variants: ",nrow(variant_list[variant_list$CTX_THAL_shared==1,])))
#print(paste("CTX and HIP shared variants: ",nrow(variant_list[variant_list$CTX_HIP_shared==1,])))
#print(paste("THAL and HIP shared variants: ",nrow(variant_list[variant_list$HIP_THAL_shared==1,])))

print(paste("CTX only variants: ",nrow(variant_list[variant_list$CTX_only==1,])))
print(paste("HIP only variants: ",nrow(variant_list[variant_list$HIP_only==1,])))
print(paste("THAL only variants: ",nrow(variant_list[variant_list$THAL_only==1,])))
print(paste("BG only variants: ",nrow(variant_list[variant_list$BG_only==1,])))

print(paste("CTX HIP shared variants: ",nrow(variant_list[variant_list$CTX_HIP_shared==1,])))
print(paste("CTX THAL shared variants",nrow(variant_list[variant_list$CTX_THAL_shared==1,])))
print(paste("CTX BG shared variants",nrow(variant_list[variant_list$CTX_BG_shared==1,])))

#print(paste("THAL only variants: ",nrow(variant_list[variant_list$THAL_only==1,])))
print(paste("Left only variants: ",nrow(variant_list[variant_list$Left_only==1,])))
print(paste("Right only variants: ",nrow(variant_list[variant_list$Right_only==1,])))


[1] "CTX only variants:  35"
[1] "HIP only variants:  0"
[1] "THAL only variants:  3"
[1] "BG only variants:  5"
[1] "CTX HIP shared variants:  3"
[1] "CTX THAL shared variants 1"
[1] "CTX BG shared variants 22"
[1] "Left only variants:  44"
[1] "Right only variants:  37"


In [18]:
variant_list2 <- variant_list[,-1]
colnames(variant_list2)<- paste0(colnames(variant_list2), "_DLX1")
variant_list_bulk <- read.csv("./780_variant_annotation_COUPTF2_TBR1.csv",header=T,sep=",", row.names = 1)
variant_list2<-cbind(variant_list_bulk,variant_list2)

In [19]:
write.csv(variant_list2,"./780_variant_annotation_COUPTF2_TBR1_DLX1.csv")

# CBH hypothesis

In [20]:

print(paste("A total of variant calls: ",nrow(filtered)))
print(paste("The number of variants included in ampliseq: ", length(unique(filtered$CHROM.POS.REF.ALT))))

filtered2 <- subset(filtered, CTX_BG_HIP_relationship %in% c('1'))
filtered2$Punch<-as.character(filtered2$Punch)
filtered2$Note<-as.character(filtered2$Note)

mosaic_filtered <- filtered2[filtered2$Mosaic_shared_btw_WGS_Amp==1,]
mosaic_filtered_cutoff <- subset(mosaic_filtered, LOWER_CI>0.002360687 & UPPER_CI<0.4562841 & NORMAL_LOWER_CI<0.002360687)

print(paste("The number of validated MVs: ", length(unique(mosaic_filtered$CHROM.POS.REF.ALT))))
print(paste("The number of validated MV calls: ", length(mosaic_filtered_cutoff$CHROM.POS.REF.ALT)))

# statistics

variant_list<-unique(mosaic_filtered$CHROM.POS.REF.ALT)
variant_list<-as.data.frame(variant_list, row.names = variant_list) # assign rownames as CHROM.POS.REF.ALT
print(paste("The number of validated MVs (in the variant_list variable): ", nrow(variant_list)))
for (i in 1:nrow(variant_list)){
    xx <- filtered2[filtered2$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
    xx <- subset(xx, LOWER_CI>0.002360687 & UPPER_CI<0.4562841 & NORMAL_LOWER_CI<0.002360687 )
    n_row <- nrow(xx)
    variant_list$sample_number_presented[i] <- n_row
    }
print(paste("The number of validated MVs and detected in this 2023 Ampliseq: ", nrow(variant_list[variant_list$sample_number_presented>0,])))
# Non-private Mosaic variants
print(paste("The number of shared validated MVs : ",nrow(variant_list[variant_list$sample_number_presented>1,])))

[1] "A total of variant calls:  353682"
[1] "The number of variants included in ampliseq:  2406"
[1] "The number of validated MVs:  780"
[1] "The number of validated MV calls:  922"
[1] "The number of validated MVs (in the variant_list variable):  780"
[1] "The number of validated MVs and detected in this 2023 Ampliseq:  131"
[1] "The number of shared validated MVs :  115"


In [21]:
for (j in c('CTX','BG','HIP')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Organ)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

for (j in c('I','F','P','O','mO','T','Cau','Put','GP')){
    for (i in 1:nrow(variant_list)){
        xx <- mosaic_filtered_cutoff[mosaic_filtered_cutoff$CHROM.POS.REF.ALT == variant_list$variant_list[i],]
        if(j %in% unique(xx$Region)){variant_list[i,j] <- 1}
        else{variant_list[i,j] <- 0}
        }
}

In [22]:
names(variant_list)
#'HEART','ADRENAL','LIVER','KIDNEY','SKIN','POA','BG','THAL','HIP','OLF''CB','CTX',
CTX_only_variants<-subset(variant_list, (BG==0) & (HIP==0) & (CTX==1))
variant_list$CTX_only[variant_list$variant_list %in% rownames(CTX_only_variants)]<-1
variant_list$CTX_only[!variant_list$variant_list %in% rownames(CTX_only_variants)]<-0

only_variants<-subset(variant_list, (BG==0) & (HIP==1) & (CTX==0))
variant_list$HIP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$HIP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (BG==1) & (HIP==0) & (CTX==0))
variant_list$BG_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$BG_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (BG==1) & (GP==1) & (Cau==0) & (Put==0) & (CTX==0) & (HIP==0))
variant_list$GP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$GP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (BG==0) & (HIP==1) & (CTX==1))
variant_list$CTX_HIP_shared[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$CTX_HIP_shared[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (HIP==0) & (BG==1) & (CTX==1))
variant_list$CTX_BG_shared[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$CTX_BG_shared[!variant_list$variant_list %in% rownames(only_variants)]<-0

only_variants<-subset(variant_list, (HIP==1) & (BG==1) & (CTX==0))
variant_list$BG_HIP_shared[variant_list$variant_list %in% rownames(only_variants)]<-1
variant_list$BG_HIP_shared[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & CTX==0 & OLF==0 & CB==0)& (HIP==1 | BG==1))
# variant_list$BG_HIP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$BG_HIP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (HEART==0 & ADRENAL==0 & LIVER==0 & KIDNEY==0 & SKIN==0 & POA==0 & THAL==0 & BG==0 & OLF==0 & CB==0)& (CTX==1 | HIP==1))
# variant_list$CTX_HIP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$CTX_HIP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (TBR1==0) & (DLX1==1 | COUPTF2==1) & (Brain_only==1))
# variant_list$IN_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$IN_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (TBR1==0) & (COUPTF2==1) & (Brain_only==1))
# variant_list$COUP_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$COUP_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (DLX1==0 & COUPTF2==0) & (TBR1==1) & (Brain_only==1))
# variant_list$TBR_only[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$TBR_only[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (TBR1==1) & (Brain_only==1))
# variant_list$TBR_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$TBR_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0

# only_variants<-subset(variant_list, (DLX1==1) & (Brain_only==1))
# variant_list$DLX_BR[variant_list$variant_list %in% rownames(only_variants)]<-1
# variant_list$DLX_BR[!variant_list$variant_list %in% rownames(only_variants)]<-0

In [23]:
#print(paste("CTX_BG only variants: ",nrow(variant_list[variant_list$CTX_BG_only==1,])))
#print(paste("CTX and THAL shared variants: ",nrow(variant_list[variant_list$CTX_THAL_shared==1,])))
#print(paste("CTX and HIP shared variants: ",nrow(variant_list[variant_list$CTX_HIP_shared==1,])))
#print(paste("THAL and HIP shared variants: ",nrow(variant_list[variant_list$HIP_THAL_shared==1,])))

print(paste("CTX only variants: ",nrow(variant_list[variant_list$CTX_only==1,])))
print(paste("HIP only variants: ",nrow(variant_list[variant_list$HIP_only==1,])))
print(paste("BG only variants: ",nrow(variant_list[variant_list$BG_only==1,])))
print(paste("GP only variants: ",nrow(variant_list[variant_list$GP_only==1,])))

print(paste("CTX HIP shared variants: ",nrow(variant_list[variant_list$CTX_HIP_shared==1,])))
print(paste("CTX BG shared variants",nrow(variant_list[variant_list$CTX_BG_shared==1,])))
print(paste("BG HIP shared variants",nrow(variant_list[variant_list$BG_HIP_shared==1,])))


#print(paste("THAL only variants: ",nrow(variant_list[variant_list$THAL_only==1,])))
print(paste("Left only variants: ",nrow(variant_list[variant_list$Left_only==1,])))
print(paste("Right only variants: ",nrow(variant_list[variant_list$Right_only==1,])))


[1] "CTX only variants:  14"
[1] "HIP only variants:  4"
[1] "BG only variants:  10"
[1] "GP only variants:  5"
[1] "CTX HIP shared variants:  1"
[1] "CTX BG shared variants 43"
[1] "BG HIP shared variants 0"
[1] "Left only variants:  0"
[1] "Right only variants:  0"


In [24]:
variant_list2 <- variant_list[,-1]
colnames(variant_list2)<- paste0(colnames(variant_list2), "_CBH")
variant_list_bulk <- read.csv("./780_variant_annotation_COUPTF2_TBR1_DLX1.csv",header=T,sep=",", row.names = 1)
variant_list2<-cbind(variant_list_bulk,variant_list2)

In [25]:
write.csv(variant_list2,"./780_variant_annotation_COUPTF2_TBR1_DLX1_CBH.csv")