In [84]:
library(dplyr)
library(miceadds)

In [89]:
df_nejm <- read.table("../Validation/full_data_validation_NO_ITD.tsv")

# Rename columns like in Cardiff

df_nejm <- df_nejm %>% rename(OS_stat = OS_Status, gender = Gender , age = Age, hb = HB, bm_blasts = BM_Blasts, plt = PLT, wbc = WBC)
# Add CR Relapse and their status from AMLSG_Clinical:

load.Rdata("../Validation/AMLSG_Clinical_Anon.RData","val_data")
rownames(val_data) <- val_data$PDID

df_nejm <- merge(df_nejm,val_data[,c("PDID","CR_date","rfs","rfsstat")],by = 0)
df_nejm <- df_nejm %>% rename(CR = CR_date, Relapse = rfs , Relapse_stat = rfsstat)

df_nejm$CR <- as.numeric(df_nejm$CR) / 365
df_nejm$Relapse <- df_nejm$Relapse / 365



# Handle  CR stat and Relapse stat

# For CR stat , we say that when CR is Na the status is 0 or not Na but CR greater than OS

df_nejm$CR_stat <- ifelse(is.na(df_nejm$CR) | (!is.na(df_nejm$CR) & df_nejm$CR>df_nejm$OS),0,1)

## for relapse we keep relapse stat when available and relapse lower than OS (because rfs is either relapse or OS so if they are equals it means that it's OS so we put 0)
## and we put 0 when patients did not experienced CR (because you need CR before Relapse)
#we just made sure that CR+Relapse is lower than OS

df_nejm[is.na(df_nejm$CR) |is.na(df_nejm$Relapse) |( !is.na(df_nejm$CR) &!is.na(df_nejm$Relapse) & round(df_nejm$CR+df_nejm$Relapse,5)>=round(df_nejm$OS,5)) ,"Relapse_stat"] <- 0
 
# # Handle CR
df_nejm[df_nejm$CR_stat==0,"CR"] <- df_nejm[df_nejm$CR_stat==0,"OS"] 

# # Handle Relapse

df_nejm[df_nejm$Relapse_stat==0,"Relapse"] <- df_nejm[df_nejm$Relapse_stat==0,"OS"]
df_nejm[df_nejm$Relapse_stat==1,"Relapse"] <- df_nejm[df_nejm$Relapse_stat==1,"CR"] + df_nejm[df_nejm$Relapse_stat==1,"Relapse"]


df_nejm$Death_in_CR <- df_nejm$OS
df_nejm$Death_in_CR_stat <- ifelse(df_nejm$CR_stat==1 & df_nejm$Relapse_stat==0 & df_nejm$OS_stat==1,1,0 )

df_nejm$Death_in_Relapse <- df_nejm$OS
df_nejm$Death_in_Relapse_stat <- ifelse(df_nejm$CR_stat==1 & df_nejm$Relapse_stat==1 & df_nejm$OS_stat==1,1,0 )

df_nejm$Death_without <- df_nejm$OS
df_nejm$Death_without_stat <- ifelse(df_nejm$CR_stat==0 & df_nejm$Relapse_stat==0 & df_nejm$OS_stat==1,1,0 )


# Add New Proposal

df_nejm$molecular_classification <- "none"

df_nejm[(df_nejm$full_component_NPM1==1 |df_nejm$full_component_inv_16==1 | df_nejm$full_component_t_8_21==1 | df_nejm$full_component_t_15_17==1 ) & df_nejm$molecular_classification=="none","molecular_classification"] <- "NEW_favorable"

df_nejm[(df_nejm$full_component_chr_splicing_1==1 | df_nejm$full_component_t_6_9==1) & df_nejm$molecular_classification=="none" ,"molecular_classification"] <- "NEW_intermediate"
          
df_nejm[((df_nejm$full_component_additions==1 & df_nejm$overlap==1) | df_nejm$full_component_chr_splicing_multiple==1 | 
          df_nejm$full_component_TP53_complex==1   | df_nejm$full_component_inv_3==1) & df_nejm$molecular_classification=="none","molecular_classification"] <- "NEW_adverse"  
          
df_nejm[((df_nejm$full_component_additions==1 & df_nejm$overlap==0) | df_nejm$full_component_t_11==1 | df_nejm$full_component_WT1==1 | df_nejm$full_component_DNMT3A_IDH1_2==1 |
          df_nejm$full_component_not_assigned==1) & df_nejm$molecular_classification=="none","molecular_classification"] <- "NEW_intermediate"
          
df_nejm[(df_nejm$full_component_CEBPA_bi==1 | df_nejm$full_component_no_events==1) & df_nejm$molecular_classification=="none" ,"molecular_classification"] <- "NEW_favorable"

#order is important! adverse . then intermediate otherwise we . overlap
df_nejm[df_nejm$molecular_classification=="NEW_intermediate" & df_nejm$ITD==1,"molecular_classification"] <- "NEW_adverse"

df_nejm[df_nejm$molecular_classification=="NEW_favorable" & df_nejm$ITD==1,"molecular_classification"] <- "NEW_intermediate"

df_nejm$NEW_favorable <-  ifelse(df_nejm$molecular_classification=="NEW_favorable",1,0)
df_nejm$NEW_intermediate <-  ifelse(df_nejm$molecular_classification=="NEW_intermediate",1,0)
df_nejm$NEW_adverse <-  ifelse(df_nejm$molecular_classification=="NEW_adverse",1,0)
table(df_nejm$molecular_classification)

rownames(df_nejm) <- df_nejm$Row.names
df_nejm$Row.names <- NULL




     NEW_adverse    NEW_favorable NEW_intermediate 
             356              372              448 

In [90]:
nejm_data_tpl <- merge(df_nejm,val_data[,c("TPL_date","TPL_o")],by=0)
nejm_data_tpl$TPL_date <-as.numeric(nejm_data_tpl$TPL_date)/365
head(nejm_data_tpl)

Row.names,NPM1,CEBPA_bi,ITD,ASXL1,ATRX,BCOR,BRAF,CBL,CBLB,⋯,Death_in_Relapse,Death_in_Relapse_stat,Death_without,Death_without_stat,molecular_classification,NEW_favorable,NEW_intermediate,NEW_adverse,TPL_date,TPL_o
PD10790a,0,0,0,1,0,0,0,0,0,⋯,0.7589041,0,0.7589041,0,NEW_intermediate,0,1,0,0.2054795,1
PD10793a,0,0,0,0,0,0,0,0,0,⋯,4.7150685,0,4.7150685,0,NEW_adverse,0,0,1,0.2356164,1
PD10794a,1,0,0,0,0,0,0,0,0,⋯,5.6739726,0,5.6739726,0,NEW_favorable,1,0,0,0.2684932,1
PD10795a,1,0,1,0,0,0,0,0,0,⋯,4.509589,0,4.509589,0,NEW_intermediate,0,1,0,,0
PD10796a,0,1,0,0,0,0,0,0,0,⋯,2.8520548,0,2.8520548,0,NEW_favorable,1,0,0,0.3671233,1
PD10798a,1,0,0,0,0,0,0,0,0,⋯,4.4931507,1,4.4931507,0,NEW_favorable,1,0,0,2.7287671,1


In [91]:
nrow(nejm_data_tpl[!is.na(nejm_data_tpl$TPL_date) &nejm_data_tpl$TPL_date>nejm_data_tpl$Relapse,c("OS","CR","Relapse","TPL_date","OS_stat","Relapse_stat","CR_stat")])

In [130]:
print("Total cases:")
nrow(nejm_data_tpl[!is.na(nejm_data_tpl$TPL_date),])


print("Case 1 : CR before  transplant:")

nrow(nejm_data_tpl[!is.na(nejm_data_tpl$TPL_date) & nejm_data_tpl$CR<nejm_data_tpl$TPL_date & nejm_data_tpl$CR_stat==1 ,])
print("Subcase1 : Transplant before Relapse:")
nrow(nejm_data_tpl[!is.na(nejm_data_tpl$TPL_date) & nejm_data_tpl$CR<nejm_data_tpl$TPL_date & nejm_data_tpl$TPL_date<nejm_data_tpl$Relapse & nejm_data_tpl$Relapse_stat==1 ,])
print("Subcase2 : Transplant after Relapse:")
nrow(nejm_data_tpl[!is.na(nejm_data_tpl$TPL_date) & nejm_data_tpl$CR<nejm_data_tpl$TPL_date & nejm_data_tpl$TPL_date>nejm_data_tpl$Relapse & nejm_data_tpl$Relapse_stat==1 ,])
print("Subcase3 : No Relapse:")
nrow(nejm_data_tpl[!is.na(nejm_data_tpl$TPL_date) & nejm_data_tpl$CR<nejm_data_tpl$TPL_date & nejm_data_tpl$Relapse_stat==0 ,])

print("Case 2 : TrCR = transplant:")
(nejm_data_tpl[!is.na(nejm_data_tpl$TPL_date) & nejm_data_tpl$CR==nejm_data_tpl$TPL_date  ,c("OS","CR","Relapse","TPL_date","OS_stat","Relapse_stat","CR_stat")])

print("Case 2 : Transplant before CR:")
nrow(nejm_data_tpl[!is.na(nejm_data_tpl$TPL_date) & nejm_data_tpl$TPL_date<nejm_data_tpl$CR & nejm_data_tpl$CR_stat==1 ,])
print("Subcase 1: Relapse")
nrow(nejm_data_tpl[!is.na(nejm_data_tpl$TPL_date) & nejm_data_tpl$TPL_date<nejm_data_tpl$CR & nejm_data_tpl$CR_stat==1 & nejm_data_tpl$Relapse_stat==1 ,])
print("Subcase 2: No Relapse")
nrow(nejm_data_tpl[!is.na(nejm_data_tpl$TPL_date) & nejm_data_tpl$TPL_date<nejm_data_tpl$CR & nejm_data_tpl$CR_stat==1 & nejm_data_tpl$Relapse_stat==0 ,])




[1] "Total cases:"


[1] "Case 1 : CR before  transplant:"


[1] "Subcase1 : Transplant before Relapse:"


[1] "Subcase2 : Transplant after Relapse:"


[1] "Subcase3 : No Relapse:"


[1] "Case 2 : TrCR = transplant:"


Unnamed: 0,OS,CR,Relapse,TPL_date,OS_stat,Relapse_stat,CR_stat
154,0.6575342,0.2931507,0.6575342,0.2931507,1,0,1
174,0.430137,0.1753425,0.430137,0.1753425,1,0,1
232,2.7041096,0.4410959,1.4164384,0.4410959,1,1,1
1128,5.0109589,0.3479452,5.0109589,0.3479452,0,0,1


[1] "Case 2 : Transplant before CR:"


[1] "Subcase 1: Relapse"


[1] "Subcase 2: No Relapse"


In [57]:
val_data[which(val_data$TPL_date<val_data$CR_date),c("Time_1CR_TPL","TPL_date","CR_date","TPL_type","BaselineTherapy_","Post_Induction_Therapy_Remission")]

unique(val_data[which(val_data$TPL_date<val_data$CR_date),c("Time_1CR_TPL")])



Unnamed: 0,Time_1CR_TPL,TPL_date,CR_date,TPL_type,BaselineTherapy_,Post_Induction_Therapy_Remission
PD10790a,,75 days,179 days,FREMD,1,RD
PD10803a,,91 days,111 days,ALLO,1,RD
PD10804a,,141 days,155 days,FREMD,1,RD
PD10805a,,68 days,98 days,ALLO,1,RD
PD10808a,,174 days,204 days,ALLO,1,RD
PD10810a,,96 days,124 days,FREMD,1,RD
PD10814a,,90 days,187 days,ALLO,1,RD
PD10820a,,110 days,141 days,TPL_(Spenderart_unbekannt),1,CR
PD10821a,,75 days,130 days,ALLO,1,RD
PD10826a,,70 days,93 days,FREMD,1,RD


In [39]:
colnames(val_data)