In [2]:
library(glmnet)
library(doMC)
library(survival)
library(data.table)
library(mltools)
library(CoxBoost)
library(randomForestSRC)
library(CoxHD)

In [15]:
df_all <-read.table("df_prognosis.tsv",sep = '\t' , header = T) 

###    2094 rows 166 columns

df_all <-na.omit(df_all) # delete rows with na (161)
df_all <- df_all[df_all$os>0,] # delete when os is negative (2)

####

#Convert predicted_component to one hot encoder
df_all$new_eln<-factor(df_all$new_eln, levels = c("adverse","intermediate","favorable"), labels = 0:2, ordered = TRUE)  # convert categorical new_eln to numerical (0,1,2)
name <-rownames(df_all)
df_all$predicted_component <- as.factor(df_all$predicted_component)
df_final <- as.data.frame(one_hot(as.data.table(df_all),cols="predicted_component"))
rownames(df_final) <- name

####

###  1931 rows 180 columns

In [95]:
predictorGLM <- function(designTrain, designTest, responseTrain, alpha, ninternalfolds=10) {
    # alpha=1 --> l1 penalty
    # alpha=0 --> l2 penalty
    # alpha=1/2 --> elastic net
    set.seed(1010)
    # Train
    cvfit = cv.glmnet(designTrain, responseTrain, family="cox", alpha=alpha, nfolds=ninternalfolds, grouped=TRUE)
    # Predict
    risk.predict = predict(cvfit, newx=designTest, s="lambda.1se", type="response")
    risk.predict = as.vector(risk.predict[,1])

    return(risk.predict)
}    

predictorBoost<-function(designTrain, designTest, responseTrain){
  set.seed(1010)
  cvfit<-CoxBoost(time=responseTrain[,1],
                  status=responseTrain[,2],
                  x=designTrain)
  
  risk.predict<-predict(cvfit,designTest,newtime=responseTest[,1],newstatus=responseTest[,2],type='lp')
  
  return(as.vector(risk.predict))
}
predictorRF <- function(designTrain, designTest, responseTrain, ntree=ntree, importance="none") {
    set.seed(1010)
    # Train
    cvfit = rfsrc(Surv(time, status) ~ ., data=data.frame(designTrain,responseTrain), ntree=ntree, importance=importance)
    
    # Predict
    risk.predict = predict(cvfit, data.frame(designTest), importance=importance)$predicted
    
    return(risk.predict)
} 
predictorAIC <- function(designTrain, designTest, responseTrain) {
    set.seed(1010)
    # Train
    c <- coxph(Surv(time, status) ~ ., data=data.frame(designTrain,responseTrain))
    scopeStep <- as.formula(paste("Surv(time,status) ~", paste(colnames(designTrain), collapse="+")))
    cvfit<-step(c, scope=scopeStep, k = 2, trace=0)
    # Predict
    risk.predict = predict(cvfit, data.frame(designTest))
    
    return(risk.predict)
}
predictorRFX <- function(designTrain, designTest, responseTrain, max.iter = 500) {
    set.seed(1010)
    # Train
    cvfit = CoxRFX(data.frame(designTrain), Surv(time=responseTrain[,1],event =responseTrain[,2]) , max.iter =max.iter)
    cvfit$Z <- NULL
    # Predict
    risk.predict<-predict(cvfit,data.frame(designTest))
    
    return(risk.predict)
}

In [96]:

runCV <- function(mypredictor, response, design, nfolds=nfolds, nrepeats=nrepeats, seed=seed, mc.cores=mc.cores,alpha=alpha,use_alpha=FALSE,use_ntree=FALSE,ntree, ...) {
    # function that run "mypredictor" on a CV setting
    #
    # output a list of size the number of CV experiments (eg 50) (= nfolds x nrepeats)
    
    # "ref" contains the responses of the fold test set

    #  random number generator seed
    set.seed(seed)

    # Make folds
    n = nrow(design)
    folds <- list()
    for (i in seq(nrepeats)) {
        folds <- c(folds,split(sample(seq(n)), rep(1:nfolds, length = n)))
    }
    nexp = length(folds) # the total number CV of experiments

    # Parallel CV
    print("start CV")
    rescv = mclapply(seq(nexp),
                   FUN=function(iexp) {
                       cat(".")
                       vTrain = design[-folds[[iexp]],,drop=F]
                       vTest = design[folds[[iexp]],,drop=F]
                       lTrain = response[-folds[[iexp]],]
                       lTest = response[folds[[iexp]],]
                       # Train and Predcit
                       #predict.test = ifelse(use_alpha==TRUE,mypredictor(designTrain=vTrain, designTest=vTest, response=lTrain,alpha=alpha, ...),mypredictor(designTrain=vTrain, designTest=vTest, response=lTrain, ...))
                       if(use_alpha){
                           predict.test = mypredictor(designTrain=vTrain, designTest=vTest, response=lTrain,alpha=alpha, ...)
                       }else if(use_ntree) {
                           predict.test = mypredictor(designTrain=vTrain, designTest=vTest, response=lTrain,ntree=ntree, ...)
                       }else{
                           predict.test = mypredictor(designTrain=vTrain, designTest=vTest, response=lTrain, ...)
                       }
                       #predict.test = mypredictor(designTrain=vTrain, designTest=vTest, response=lTrain,alpha=alpha, ...)
                       # Evaluate CI on the test
                       ci.test = suppressWarnings(survConcordance(Surv(time,status) ~ predict.test, as.data.frame(lTest)))
                       return(as.vector(ci.test$concordance))
                   },
                   mc.cores=mc.cores
                   )

    return(unlist(rescv))

}


In [97]:
x <- data.matrix(df_final[,1:177])
y <- data.matrix(df_final[,c("os","os_status")])
colnames(y) = c("time","status")

predictors <- c(rep(list(predictorGLM),10),rep(list(predictorRF),10),predictorBoost,predictorRFX)
str_predictors <-c(rep("CoxMod",10),rep("RFS",10),"CoxBoost","RFX")
res.CLIN_DEMO_CYTO <- c()
l_alpha <-seq(0.1,1,0.1)
l_ntree <- seq(100,1000,100)
i<-0
j<-0
k<-0
for(predictor in predictors){
    use_alpha<-ifelse(identical(predictorGLM,predictor),TRUE,FALSE)
    use_ntree<-ifelse(identical(predictorRF,predictor),TRUE,FALSE)
    i <- i+1
    j <-ifelse(use_alpha,j+1,j)
    k <-ifelse(use_ntree,k+1,k)
    alpha <- l_alpha[j]
    ntree <-l_ntree[k]
    tmp <- runCV(mypredictor=predictor,
          response=y, design=x,
          nfolds=5, nrepeats=10, seed=233,use_alpha=use_alpha,alpha=alpha,use_ntree=use_ntree,ntree=ntree, mc.cores=1)
    res.CLIN_DEMO_CYTO <- cbind(res.CLIN_DEMO_CYTO,tmp)
    colnames(res.CLIN_DEMO_CYTO) [i] <-paste(str_predictors[i],ifelse(use_alpha,alpha,
                                                                                   ifelse(use_ntree,ntree,"")),sep="_")
}

[1] "start CV"
..................................................[1] "start CV"
..................................................[1] "start CV"
..................................................[1] "start CV"
..................................................[1] "start CV"
..................................................[1] "start CV"
..................................................[1] "start CV"
..................................................[1] "start CV"
..................................................[1] "start CV"
..................................................[1] "start CV"
..................................................[1] "start CV"
..................................................[1] "start CV"
..................................................[1] "start CV"
..................................................[1] "start CV"
..................................................[1] "start CV"
..................................................[1] "start CV"
..........

In [98]:
res.CLIN_DEMO_CYTO

CoxMod_0.1,CoxMod_0.2,CoxMod_0.3,CoxMod_0.4,CoxMod_0.5,CoxMod_0.6,CoxMod_0.7,CoxMod_0.8,CoxMod_0.9,CoxMod_1,⋯,RFS_300,RFS_400,RFS_500,RFS_600,RFS_700,RFS_800,RFS_900,RFS_1000,CoxBoost_,RFX_
0.7195823,0.7155953,0.7131115,0.7120004,0.7109219,0.7113304,0.7100559,0.7101539,0.7104154,0.7102846,⋯,0.7351057,0.7356776,0.7347299,0.7351874,0.735645,0.7344031,0.7352038,0.7344848,0.7167064,0.7192229
0.697876,0.6993523,0.7006223,0.7021621,0.701924,0.7024002,0.7015748,0.7017652,0.7017494,0.701797,⋯,0.7119249,0.7120361,0.7125123,0.713306,0.7122742,0.7128933,0.7133219,0.7121631,0.7069245,0.7136235
0.7131601,0.7151945,0.7156465,0.716034,0.7150653,0.7150815,0.7145002,0.7146294,0.714694,0.7147424,⋯,0.7226375,0.7230412,0.7236224,0.7232672,0.7242359,0.724914,0.7241875,0.7246557,0.7141127,0.7149039
0.7059456,0.7103234,0.7117141,0.7119038,0.7123305,0.7128204,0.7132313,0.7131681,0.7136106,0.7139109,⋯,0.7330657,0.7317224,0.7327654,0.733524,0.7321649,0.7321965,0.7307425,0.731185,0.7178936,0.7182729
0.7115249,0.7080682,0.7074711,0.7071412,0.7063084,0.7059628,0.7050515,0.7048001,0.7046115,0.7046901,⋯,0.7299709,0.7322335,0.7319978,0.7313222,0.7320135,0.7318721,0.7296724,0.7318721,0.708131,0.7071883
0.7304974,0.730079,0.7271502,0.7256315,0.7252906,0.7254145,0.7254455,0.7251046,0.7250891,0.7253061,⋯,0.7348055,0.7375484,0.7389121,0.7381838,0.7392996,0.7381063,0.7383078,0.7389121,0.7299241,0.7394855
0.695684,0.6958909,0.6953498,0.6924454,0.6925489,0.6926523,0.6927319,0.6923977,0.6925727,0.6926682,⋯,0.7225158,0.7234706,0.7259055,0.723582,0.7246324,0.7226908,0.7235979,0.7244096,0.6985804,0.7014132
0.7149169,0.7191738,0.7213506,0.7228341,0.7236403,0.7241724,0.7243659,0.7246239,0.7243498,0.7246239,⋯,0.7299934,0.728784,0.7282197,0.7279133,0.7287195,0.7293484,0.7279456,0.7288647,0.7258494,0.7236242
0.7063914,0.7064238,0.7046929,0.7035605,0.7038032,0.7023311,0.7025575,0.7026546,0.7011502,0.7011987,⋯,0.7317728,0.7325008,0.731061,0.7317566,0.7309802,0.7315463,0.7316919,0.7313846,0.7060194,0.7073135
0.7009004,0.7040204,0.7038588,0.7036325,0.7040043,0.7038426,0.7037618,0.7038911,0.7039719,0.7039234,⋯,0.7198467,0.7225303,0.7231122,0.7221585,0.7228051,0.7215927,0.7208652,0.7225626,0.7058472,0.7030667


In [99]:
summary(res.CLIN_DEMO_CYTO)

   CoxMod_0.1       CoxMod_0.2       CoxMod_0.3       CoxMod_0.4    
 Min.   :0.6840   Min.   :0.6851   Min.   :0.6842   Min.   :0.6845  
 1st Qu.:0.7015   1st Qu.:0.7041   1st Qu.:0.7041   1st Qu.:0.7036  
 Median :0.7117   Median :0.7108   Median :0.7109   Median :0.7106  
 Mean   :0.7106   Mean   :0.7114   Mean   :0.7113   Mean   :0.7109  
 3rd Qu.:0.7189   3rd Qu.:0.7188   3rd Qu.:0.7190   3rd Qu.:0.7188  
 Max.   :0.7399   Max.   :0.7391   Max.   :0.7390   Max.   :0.7401  
   CoxMod_0.5       CoxMod_0.6       CoxMod_0.7       CoxMod_0.8    
 Min.   :0.6839   Min.   :0.6846   Min.   :0.6851   Min.   :0.6839  
 1st Qu.:0.7039   1st Qu.:0.7026   1st Qu.:0.7027   1st Qu.:0.7028  
 Median :0.7102   Median :0.7100   Median :0.7097   Median :0.7097  
 Mean   :0.7107   Mean   :0.7104   Mean   :0.7102   Mean   :0.7101  
 3rd Qu.:0.7184   3rd Qu.:0.7180   3rd Qu.:0.7178   3rd Qu.:0.7175  
 Max.   :0.7404   Max.   :0.7408   Max.   :0.7406   Max.   :0.7407  
   CoxMod_0.9        CoxMod_1     

In [100]:
colnames(x)