In [42]:
library(gridExtra)
library(matrixStats)
library(dplyr)
library(rlang)
library(reshape2)
library(clusterCrit)
library(grid)
library(ggrepel)
library(RColorBrewer)
library(dplyr)
library(IRdisplay)
library(scales)
library(survival)
library(corrplot)
library(Hmisc)
library(ggplot2)
library(dummies)
source('../../../src/tools.R')     # custom tools function
source('../../../src/hdp_tools_yanis.R')
source("../../../src/merge_df.R")
source("../../../src/my_toolbox.R")
source("../../../src/my_components.R")
source("../../../src/my_utils.R")
source("../../../src/ggstyles.R")
source("../../../src/my_hotspots.R")
###


theme_set(theme_minimal())

# set jupyer notebook parameters
options(repr.plot.res        = 100, # set a medium-definition resolution for the jupyter notebooks plots (DPI)
        repr.matrix.max.rows = 200, # set the maximum number of rows displayed
        repr.matrix.max.cols = 200) # set the maximum number of columns displayed

# I DATAFRAME LOCKED WITHOUT ITD STRATIFICATION
# II SAME WITH ITD STRATIFICATION

In [58]:
# Final Table to use without ITD
continuous_correlates <- c("age","wbc","hb","plt","bm_blasts","os")


df_final <- read.table("df_final_with_comp.tsv")[,c(1:156)]
tmp <- df_final
factors <- c("ZRSR2","U2AF1_p.S34","U2AF1_p.Q157","SRSF2","SF3B1","ASXL1","STAG2","BCOR","RUNX1","EZH2","MLL","PHF6","SF1","NF1","CUX1","SETBP1")
correlates <- c("ahd","perf_status","bm_blasts","secondary","wbc","hb","plt","gender","age","os","os_status")
tmp$full_component_t_15_17 <- ifelse((df_final$t_15_17==1),1,0) 
tmp$full_component_inv_16 <- ifelse((df_final$inv_16==1),1,0) 
tmp$full_component_t_8_21 <- ifelse((df_final$t_8_21==1),1,0) 
tmp$full_component_t_11 <- ifelse((df_final$t_v_11==1 | df_final$t_9_11==1),1,0)   
tmp$full_component_t_6_9 <- ifelse((df_final$t_6_9==1),1,0) 
tmp$full_component_inv_3 <- ifelse((df_final$inv_3==1),1,0)
tmp$full_component_additions <- ifelse((df_final$add_8==1 | df_final$add_13==1 | df_final$add_21==1 | df_final$add_22==1),1,0)
tmp$full_component_TP53_complex <- ifelse((df_final$TP53==1 | df_final$complex==1) ,1,0)
tmp$full_component_NPM1 <- ifelse((df_final$NPM1==1),1,0) 
tmp$full_component_CEBPA_bi <- ifelse((df_final$CEBPA_bi==1),1,0) 
tmp$full_component_DNMT3A_IDH1_2 <- ifelse((df_final$DNMT3A==1 & (df_final$IDH1==1 | df_final$IDH2_p.R140==1 | df_final$IDH2_p.R172==1)),1,0)
tmp$full_component_WT1 <- ifelse((df_final$WT1==1),1,0) 
tmp$full_component_chr_splicing_multiple <- ifelse((rowSums(df_final[,factors])>1) ,1,0)
tmp$full_component_chr_splicing_1 <- ifelse((rowSums(df_final[,factors])==1),1,0)
tmp$full_component_not_assigned <- ifelse(df_final$final_component=="not_assigned",1,0)
tmp$full_component_no_events <- ifelse(df_final$final_component=="no_events",1,0)

df_initial <- read.table("../../../data/initial_dataset/Master_04_10_2019.csv",sep = ',' , header = T)
rownames(df_initial) <- df_initial$data_pd
df_initial <- df_initial[,-1:-4]
df_eln <- read.table("../../../data/updated_dataset/eln_final.tsv",sep = '\t' , header = T)
rownames(df_eln) <- df_eln$X
df_eln <- df_eln[-1]
df <- merge(df_eln,tmp,by=0)
rownames(df) <- df$Row.names
df <- df[-1]
df <- merge(df,df_initial[,correlates],by=0)
rownames(df) <- df$Row.names
df <- df[-1]
for (col in colnames(df)[colSums(is.na(df)) > 0]){
    if(!is.element(col,c("os","os_status"))){        
        if(is.element(col,continuous_correlates)){
            df[,col][is.na(df[,col])] <- mean(df[,col], na.rm=TRUE)
        }else{
            df[,col][is.na(df[,col])] <- median(df[,col], na.rm=TRUE)
            }
    }
   
}
df <- na.omit(df)
df <- df[df$os>0,]

# Handling additions
df[df$full_component_additions==1 & (df$full_component_t_15_17==1 | df$full_component_inv_16==1 | df$full_component_t_8_21==1 | df$full_component_t_11==1 | df$full_component_t_6_9==1 | df$full_component_inv_3==1 | df$full_component_NPM1==1 | df$full_component_CEBPA_bi==1 | df$full_component_DNMT3A_IDH1_2==1 | df$full_component_WT1==1 ),"full_component_additions"] <- 0

df[df$full_component_additions==1 & (df$full_component_TP53_complex==1) & !(df$del_7==1 | df$del_17==1 | df$del_5==1 | df$del_9==1 |df$del_13==1 |df$del_20==1 | df$del_18==1 | df$del_16==1 | df$del_12==1 | df$del_3==1 | df$minusy==1),"full_component_additions"] <- 0   ### overlap with no deletions are 0

# Handling DNMT3A IDH
df[df$full_component_DNMT3A_IDH1_2==1 & (df$full_component_t_15_17==1 | df$full_component_inv_16==1 | df$full_component_t_8_21==1 | df$full_component_t_11==1 | df$full_component_t_6_9==1 | df$full_component_inv_3==1 | df$full_component_TP53_complex==1 | df$full_component_NPM1==1 | df$full_component_CEBPA_bi==1 | df$full_component_additions==1 ),"full_component_DNMT3A_IDH1_2"] <- 0

# Handling WT1
df[df$full_component_WT1==1 & (df$full_component_t_15_17==1 | df$full_component_inv_16==1 | df$full_component_t_8_21==1 | df$full_component_t_11==1 | df$full_component_t_6_9==1 | df$full_component_inv_3==1 | df$full_component_TP53_complex==1 | df$full_component_NPM1==1 | df$full_component_CEBPA_bi==1 ),"full_component_WT1"] <- 0

# Handling chr_splicing_1
df[df$full_component_chr_splicing_1==1 & (df$full_component_t_15_17==1 | df$full_component_inv_16==1 | df$full_component_t_8_21==1 | df$full_component_t_11==1 | df$full_component_t_6_9==1 | df$full_component_inv_3==1 | df$full_component_TP53_complex==1 | df$full_component_NPM1==1 | df$full_component_CEBPA_bi==1 ),"full_component_chr_splicing_1"] <- 0

df$overlap <- 0
df[rowSums(df[,colnames(df[,startsWith(colnames(df),
                                       
                                       "full_component_"),])])>1,"overlap"] <- 1
df <- df[,c(1:158,177:185,159:176,188,186:187)]
df$eln_2017 <- ifelse(df$eln_2017=="adverse",1,
                    ifelse(df$eln_2017=="intermediate",2,3))
write.table(df,"df_final_full_component.tsv")

In [59]:
# With ITD

continuous_correlates <- c("age","wbc","hb","plt","bm_blasts","os")


df_final <- read.table("df_final_with_comp.tsv")[,c(1:156)]
tmp <- df_final
factors <- c("ZRSR2","U2AF1_p.S34","U2AF1_p.Q157","SRSF2","SF3B1","ASXL1","STAG2","BCOR","RUNX1","EZH2","MLL","PHF6","SF1","NF1","CUX1","SETBP1")
correlates <- c("ahd","perf_status","bm_blasts","secondary","wbc","hb","plt","gender","age","os","os_status")
tmp$full_component_t_15_17 <- ifelse((df_final$t_15_17==1),1,0)

tmp$full_component_inv_16 <- ifelse((df_final$inv_16==1),1,0) 

tmp$full_component_t_8_21 <- ifelse((df_final$t_8_21==1 & df_final$ITD==0),1,0)
tmp$full_component_t_8_21_ITD <- ifelse((df_final$t_8_21==1 & df_final$ITD==1),1,0) 

tmp$full_component_t_11 <- ifelse((df_final$t_v_11==1 | df_final$t_9_11==1),1,0)   

tmp$full_component_t_6_9 <- ifelse((df_final$t_6_9==1 & df_final$ITD==0),1,0)
tmp$full_component_t_6_9_ITD <- ifelse((df_final$t_6_9==1 & df_final$ITD==1),1,0)

tmp$full_component_inv_3 <- ifelse((df_final$inv_3==1),1,0)

tmp$full_component_additions <- ifelse((df_final$add_8==1 | df_final$add_13==1 | df_final$add_21==1 | df_final$add_22==1),1,0)

tmp$full_component_TP53_complex <- ifelse((df_final$TP53==1 | df_final$complex==1) ,1,0)

tmp$full_component_NPM1 <- ifelse((df_final$NPM1==1 & df_final$ITD==0),1,0) 
tmp$full_component_NPM1_ITD <- ifelse((df_final$NPM1==1 & df_final$ITD==1),1,0)

tmp$full_component_CEBPA_bi <- ifelse((df_final$CEBPA_bi==1 & df_final$ITD==0),1,0) 
tmp$full_component_CEBPA_bi_ITD <- ifelse((df_final$CEBPA_bi==1 & df_final$ITD==1),1,0)

tmp$full_component_DNMT3A_IDH1_2 <- ifelse((df_final$DNMT3A==1 & (df_final$IDH1==1 | df_final$IDH2_p.R140==1 | df_final$IDH2_p.R172==1) & df_final$ITD==0),1,0)
tmp$full_component_DNMT3A_IDH1_2_ITD <- ifelse((df_final$DNMT3A==1 & (df_final$IDH1==1 | df_final$IDH2_p.R140==1 | df_final$IDH2_p.R172==1) & df_final$ITD==1),1,0)

tmp$full_component_WT1 <- ifelse((df_final$WT1==1 & df_final$ITD==0),1,0) 
tmp$full_component_WT1_ITD <- ifelse((df_final$WT1==1 & df_final$ITD==1),1,0)

tmp$full_component_chr_splicing_multiple <- ifelse((rowSums(df_final[,factors])>1) ,1,0)

tmp$full_component_chr_splicing_1 <- ifelse((rowSums(df_final[,factors])==1),1,0)

tmp$full_component_not_assigned <- ifelse(df_final$final_component=="not_assigned" & df_final$ITD==0,1,0)
tmp$full_component_not_assigned_ITD <- ifelse(df_final$final_component=="not_assigned" & df_final$ITD==1,1,0)

tmp$full_component_no_events <- ifelse(df_final$final_component=="no_events",1,0)


df_initial <- read.table("../../../data/initial_dataset/Master_04_10_2019.csv",sep = ',' , header = T)
rownames(df_initial) <- df_initial$data_pd
df_initial <- df_initial[,-1:-4]
df_eln <- read.table("../../../data/updated_dataset/eln_final.tsv",sep = '\t' , header = T)
rownames(df_eln) <- df_eln$X
df_eln <- df_eln[-1]
df <- merge(df_eln,tmp,by=0)
rownames(df) <- df$Row.names
df <- df[-1]
df <- merge(df,df_initial[,correlates],by=0)
rownames(df) <- df$Row.names
df <- df[-1]
for (col in colnames(df)[colSums(is.na(df)) > 0]){
    if(!is.element(col,c("os","os_status"))){        
        if(is.element(col,continuous_correlates)){
            df[,col][is.na(df[,col])] <- mean(df[,col], na.rm=TRUE)
        }else{
            df[,col][is.na(df[,col])] <- median(df[,col], na.rm=TRUE)
            }
    }
   
}
df <- na.omit(df)
df <- df[df$os>0,]

# Handling additions   (for overlap with tp53 we want at least one deletion)
df[df$full_component_additions==1 & (df$full_component_t_15_17==1 | df$full_component_inv_16==1 | df$full_component_t_8_21==1 |df$full_component_t_8_21_ITD==1 |
                                     df$full_component_t_11==1 | df$full_component_t_6_9==1 | df$full_component_t_6_9_ITD==1 | df$full_component_inv_3==1 |
                                     df$full_component_NPM1==1 |df$full_component_NPM1_ITD==1 | df$full_component_CEBPA_bi==1 | df$full_component_CEBPA_bi_ITD==1 |
                                     df$full_component_DNMT3A_IDH1_2==1 | df$full_component_DNMT3A_IDH1_2_ITD==1 | df$full_component_WT1==1 | df$full_component_WT1_ITD==1),"full_component_additions"] <- 0

df[df$full_component_additions==1 & (df$full_component_TP53_complex==1) & !(df$del_7==1 | df$del_17==1 | df$del_5==1 | df$del_9==1 |df$del_13==1 |
                                                                           df$del_20==1 | df$del_18==1 | df$del_16==1 | df$del_12==1 | df$del_3==1 | df$minusy==1),"full_component_additions"] <- 0   ### overlap with no deletions are 0

# Handling DNMT3A IDH
df[df$full_component_DNMT3A_IDH1_2==1 & (df$full_component_t_15_17==1 | df$full_component_inv_16==1 | df$full_component_t_8_21==1 | df$full_component_t_8_21_ITD==1 | df$full_component_t_11==1 |
                                         df$full_component_t_6_9==1 | df$full_component_t_6_9_ITD==1 | df$full_component_inv_3==1 | df$full_component_TP53_complex==1 | df$full_component_NPM1==1 |
                                         df$full_component_NPM1_ITD==1 | df$full_component_CEBPA_bi==1 | df$full_component_CEBPA_bi_ITD==1 | df$full_component_additions==1 ),"full_component_DNMT3A_IDH1_2"] <- 0

df[df$full_component_DNMT3A_IDH1_2_ITD==1 & (df$full_component_t_15_17==1 | df$full_component_inv_16==1 | df$full_component_t_8_21==1 | df$full_component_t_8_21_ITD==1 | df$full_component_t_11==1 |
                                         df$full_component_t_6_9==1 | df$full_component_t_6_9_ITD==1 | df$full_component_inv_3==1 | df$full_component_TP53_complex==1 | df$full_component_NPM1==1 |
                                         df$full_component_NPM1_ITD==1 | df$full_component_CEBPA_bi==1 | df$full_component_CEBPA_bi_ITD==1 | df$full_component_additions==1 ),"full_component_DNMT3A_IDH1_2_ITD"] <- 0

# Handling WT1
df[df$full_component_WT1==1 & (df$full_component_t_15_17==1 | df$full_component_inv_16==1 | df$full_component_t_8_21==1 | df$full_component_t_8_21_ITD==1 | df$full_component_t_11==1 |
                               df$full_component_t_6_9==1 | df$full_component_t_6_9_ITD==1 | df$full_component_inv_3==1 | df$full_component_TP53_complex==1 | df$full_component_NPM1==1 |
                               df$full_component_NPM1_ITD==1 | df$full_component_CEBPA_bi==1 | df$full_component_CEBPA_bi_ITD==1 ),"full_component_WT1"] <- 0

df[df$full_component_WT1_ITD==1 & (df$full_component_t_15_17==1 | df$full_component_inv_16==1 | df$full_component_t_8_21==1 | df$full_component_t_8_21_ITD==1 | df$full_component_t_11==1 |
                               df$full_component_t_6_9==1 | df$full_component_t_6_9_ITD==1 | df$full_component_inv_3==1 | df$full_component_TP53_complex==1 | df$full_component_NPM1==1 |
                               df$full_component_NPM1_ITD==1 | df$full_component_CEBPA_bi==1 | df$full_component_CEBPA_bi_ITD==1 ),"full_component_WT1_ITD"] <- 0


# Handling chr_splicing_1
df[df$full_component_chr_splicing_1==1 & (df$full_component_t_15_17==1 | df$full_component_inv_16==1 | df$full_component_t_8_21==1 | df$full_component_t_8_21_ITD==1 |
                                          df$full_component_t_11==1 | df$full_component_t_6_9==1 | df$full_component_t_6_9_ITD==1 | df$full_component_inv_3==1 |
                                          df$full_component_TP53_complex==1 | df$full_component_NPM1==1 | df$full_component_NPM1_ITD==1 | df$full_component_CEBPA_bi==1 | df$full_component_CEBPA_bi_ITD==1 ),"full_component_chr_splicing_1"] <- 0

df$overlap <- 0
df[rowSums(df[,colnames(df[,startsWith(colnames(df),"full_component_"),])])>1,"overlap"] <- 1
df <- df[,c(1:158,184:192,159:183,195,193:194)]
df$eln_2017 <- ifelse(df$eln_2017=="adverse",1,
                    ifelse(df$eln_2017=="intermediate",2,3))
write.table(df,"df_final_full_component_ITD.tsv")

In [57]:
table(df$full_component_additions)


   0    1 
1933  192 