*For demonstration, data from batch 1 is used*

In [None]:
library(survival)
library(ggsurvfit)

1. Import clinical features

In [None]:
clin1<-read.csv("/localhome/bs22tmhn/[ResearchProject]/ClinData_batch1.csv",row.names = 1)

#remove cases without clinical data
clin1<-clin1[!is.na(clin1$Donor_ID),]
#convert status with NHLDeath=2 from 1 to 0
clin1$status[clin1$NHLDeath==2]<-0

2. Clinical feature modification

In [None]:
#Age_greaterthan_60
clin1$Age_greaterthan_60<-ifelse(clin1$AgeDiagnosis>60, 1,0)

#stage34
clin1$stage34<-ifelse(clin1$Stage1==3 | clin1$Stage1==4,1,ifelse(clin1$Stage1==1 | clin1$Stage1==2,0,NA))

#hb_lessthan_12
clin1$hb_lessthan_12<-ifelse(clin1$hb==0,NA,ifelse(clin1$hb<12,1,0))

#ldh
clin1$ldh<-ifelse(clin1$ldh1==1,0,ifelse(clin1$ldh1==2,1,NA))

#Bsymps
clin1$Bsymps <- ifelse(clin1$sweats1==1 | clin1$fever1==1 | clin1$wt_loss1==1, 1 , ifelse(is.na(clin1$sweats1) | is.na(clin1$fever1) | is.na(clin1$wt_loss1), NA,0))

#FLIPI
clin1$FLIPI <- ifelse(is.na(clin1$Age_greaterthan_60) | is.na(clin1$stage34) | is.na(clin1$hb_lessthan_12) | is.na(clin1$ldh) | is.na(clin1$nodal_sites1), NA, rowSums(clin1[, c("Age_greaterthan_60", "stage34", "hb_lessthan_12", "ldh", "nodal_sites1")]))
clin1$FLIPIscore <- ifelse(clin1$FLIPI==0|clin1$FLIPI==1,1,ifelse(clin1$FLIPI==2,2,3))

3. Kaplan Meier plots for each clinical feature in each event

- Overall survival (OS)

In [None]:
# Sex
survfit2(Surv(OS_time, status) ~ Sex1, data = clin1[!is.na(clin1$status),]) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "OS"
  ) +
  add_confidence_interval() +
  add_pvalue()

# Age
survfit2(Surv(OS_time, status) ~ Age_greaterthan_60, data = clin1[!is.na(clin1$status),]) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "OS"
  ) +
  add_confidence_interval() +
  add_pvalue()

# Stage
survfit2(Surv(OS_time, status) ~ stage34, data = clin1[!is.na(clin1$status),]) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "OS"
  ) +
  add_confidence_interval() +
  add_pvalue()

# Firstline Management
survfit2(Surv(OS_time, status) ~ FLManagement, data = clin1[!is.na(clin1$status),]) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "OS"
  ) +
  add_confidence_interval()+
  add_pvalue()

# Hb
survfit2(Surv(OS_time, status) ~ hb_lessthan_12, data = clin1[!is.na(clin1$status),]) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "OS"
  ) +
  add_confidence_interval() +
  add_pvalue()

# ldh
survfit2(Surv(OS_time, status) ~ ldh, data = clin1[!is.na(clin1$status),]) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "OS"
  ) +
  add_confidence_interval() +
  add_pvalue()

# nodal sites
survfit2(Surv(OS_time, status) ~ nodal_sites1, data = clin1[!is.na(clin1$status),]) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "OS"
  ) +
  add_confidence_interval() +
  add_pvalue()

# B symptoms
survfit2(Surv(OS_time, status) ~ Bsymps, data = clin1[!is.na(clin1$status),]) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "OS"
  ) +
  add_confidence_interval() +
  add_pvalue()

# FLIPI
survfit2(Surv(OS_time, status) ~ FLIPIscore, data = clin1[!is.na(clin1$status),]) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "OS"
  ) +
  add_confidence_interval() +
  add_pvalue()

- Time to transformation (TTT)

In [None]:
# create time to transformation variable
clin1$trans_time<-ifelse(clin1$trans==1,clin1$Transformation_time_years,clin1$OS_time)

# create TTT status variable
clin1$trans<-ifelse(clin1$DLBCL==1 | clin1$HM_aggressive==1,1,0)

In [None]:
# Sex
survfit2(Surv(trans_time, trans) ~ Sex1, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "TTT"
  ) +
  add_confidence_interval() +
  add_pvalue()

# Age
survfit2(Surv(trans_time, trans) ~ Age_greaterthan_60, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "TTT"
  ) +
  add_confidence_interval() +
  add_pvalue()

# Stage
survfit2(Surv(trans_time, trans) ~ stage34, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "TTT"
  ) +
  add_confidence_interval() +
  add_pvalue()

# Firstline Management
survfit2(Surv(trans_time, trans) ~ FLManagement, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "TTT"
  ) +
  add_confidence_interval()+
  add_pvalue()

# Hb
survfit2(Surv(trans_time, trans) ~ hb_lessthan_12, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "TTT"
  ) +
  add_confidence_interval() +
  add_pvalue()

# ldh
survfit2(Surv(trans_time, trans) ~ ldh, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "TTT"
  ) +
  add_confidence_interval() +
  add_pvalue()

# nodal sites
survfit2(Surv(trans_time, trans) ~ nodal_sites1, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "TTT"
  ) +
  add_confidence_interval() +
  add_pvalue()

# B symptoms
survfit2(Surv(trans_time, trans) ~ Bsymps, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "TTT"
  ) +
  add_confidence_interval() +
  add_pvalue()

# FLIPI
survfit2(Surv(trans_time, trans) ~ FLIPIscore, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "TTT"
  ) +
  add_confidence_interval() +
  add_pvalue()

- Progression-free survival (PFS)

In [None]:
# create PFS status variable
clin1$progressed<-ifelse(clin1$DLBCL==1 | clin1$HM_aggressive==1 | clin1$NHLDeath==1 & !is.na(clin1$NHLDeath),1,0)

# create time to progression variable
clin1$prog_time<-ifelse(clin1$DLBCL==1|clin1$HM_aggressive==1,clin1$Transformation_time_years, clin1$OS_time)

In [None]:
# Sex
survfit2(Surv(prog_time, progressed) ~ Sex1, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "PFS"
  ) +
  add_confidence_interval() +
  add_pvalue()

# Age
survfit2(Surv(prog_time, progressed) ~ Age_greaterthan_60, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "PFS"
  ) +
  add_confidence_interval() +
  add_pvalue()

# Stage
survfit2(Surv(prog_time, progressed) ~ stage34, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "PFS"
  ) +
  add_confidence_interval() +
  add_pvalue()

# Firstline Management
survfit2(Surv(prog_time, progressed) ~ FLManagement, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "PFS"
  ) +
  add_confidence_interval()+
  add_pvalue()

# Hb
survfit2(Surv(prog_time, progressed) ~ hb_lessthan_12, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "PFS"
  ) +
  add_confidence_interval() +
  add_pvalue()

# ldh
survfit2(Surv(prog_time, progressed) ~ ldh, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "PFS"
  ) +
  add_confidence_interval() +
  add_pvalue()

# nodal sites
survfit2(Surv(prog_time, progressed) ~ nodal_sites1, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "PFS"
  ) +
  add_confidence_interval() +
  add_pvalue()

# B symptoms
survfit2(Surv(prog_time, progressed) ~ Bsymps, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "PFS"
  ) +
  add_confidence_interval() +
  add_pvalue()

# FLIPI
survfit2(Surv(prog_time, progressed) ~ FLIPIscore, data = clin1) %>% 
  ggsurvfit() +
  labs(
    x = "Years",
    y = "PFS"
  ) +
  add_confidence_interval() +
  add_pvalue()