In [9]:
label_corces2016<-read.csv('../input/corces2016_barcode_metadata.tsv',sep = "\t",header = TRUE)

In [10]:
data<-readRDS("../input/corces2016-snap-full.rds")

In [11]:
label_corces2016<-label_corces2016[label_corces2016$barcode %in% rownames(data),]

In [12]:
label_corces2016<-label_corces2016[order(label_corces2016[,1]),]

In [13]:
write.table(label_corces2016,file="../input/metadata_corces2016_sorted.tsv",quote=FALSE,sep="\t",row.names = FALSE)

In [14]:
Cross_Validation <- function(LabelsPath, col_Index = 1, OutputDir){
  "
  Cross_Validation
  Function returns train and test indices for 5 folds stratified across unique cell populations,
  also filter out cell populations with less than 10 cells.
  It return a 'CV_folds.RData' file which then used as input to classifiers wrappers.
  Parameters
  ----------
  LabelsPath : Cell population annotations file path (.csv).
  col_Index : column index (integer) defining which level of annotation to use,
  in case of multiple cell type annotations (default is 1)
  OutputDir : Output directory defining the path of the exported file.
  "

  Labels <- as.matrix(LabelsPath)
  Labels <- as.vector(Labels[,col_Index])

  Removed_classes <- !(table(Labels) > 10)
  Cells_to_Keep <- !(is.element(Labels,names(Removed_classes)[Removed_classes]))
  Labels <- Labels[Cells_to_Keep]

  # Getting training and testing Folds
  library(rBayesianOptimization)
  n_folds = 5
  Folds <- KFold(Labels,nfolds = n_folds, stratified = TRUE)
  Test_Folds <- c(n_folds:1)
  Train_Idx <- list()
  Test_Idx <- list()
  for (i in c(1:length(Folds))){
    Temp_Folds <- Folds
    Temp_Folds[Test_Folds[i]] <- NULL
#     print(Temp_Folds[Test_Folds[i]])
    Train_Idx[i] <- list(unlist(Temp_Folds))
#     print(Train_Idx[i])
    Test_Idx[i] <- Folds[Test_Folds[i]]
#     print(Test_Idx[i])
  }
  remove(Temp_Folds,i,Folds)
#   print(Train_Idx)
#   print(Test_Idx)
#   print(col_Index)
  save(n_folds,Train_Idx,Test_Idx,col_Index,Cells_to_Keep,file = paste0(OutputDir, '/CV_folds.RData'))
}

Cross_Validation(label_corces2016, 3, "../tmp")

[[1]]
  [1]   2   3  17  21  32  39  45  55  60  62  63  64  66  80  96  97 109 112
 [19] 114 116 117 119 121 122 126 137 142 148 152 153 157 164 167 181 185 187
 [37] 191 196 198 212 216 224 227 228 231 233 240 242 245 250 256 269 270 274
 [55] 278 280 281 293 296 297 298 309 313 316 318 321 327 330 331 346 353 359
 [73] 367 368 375 377 378 387 393 408 419 428 432 433 435 445 449 450 452 454
 [91] 463 471 472 474 475 476 478 484 487 489 492 494 495 498 499 501 503 505
[109] 506 514 519 526 536 539 546 547 571 572  20  31  38  44  50  52  69  71
[127]  73  74  75  77  78  79  81  86  87  92  94  99 102 103 106 115 124 130
[145] 136 144 149 158 162 172 174 180 194 204 210 211 214 241 246 252 263 277
[163] 283 285 287 292 299 300 302 306 307 308 310 311 314 315 322 338 344 354
[181] 357 360 361 363 365 369 373 374 384 386 390 397 400 402 403 406 407 409
[199] 411 413 415 418 420 423 426 429 431 434 436 437 457 458 467 470 488 491
[217] 500 502 504 507 508 510 516 518 523 527 540 542 550 