In [19]:
# https://cran.r-project.org/web/packages/penaltyLearning/index.html

In [20]:
# libraries
library(penaltyLearning)
library(data.table)

In [21]:
# previous or proposed
category <- 'proposed'

In [22]:
# choose number of feature
n_features <- 8

In [23]:
# Get the names of all folders in a specified directory
datasets <- list.dirs(path = "../../training_data", full.names = FALSE, recursive = FALSE)

In [24]:
# training
for (dataset in datasets) {
    # read data
    feature.dt <- fread(paste("../../training_data/", dataset, "/features_sorted_transformed.csv", sep = ""))
    feature.dt[, names(feature.dt) := lapply(.SD, function(x) { x[is.nan(x)] <- 0; x })]
    target.dt  <- fread(paste("../../training_data/", dataset, "/target.csv", sep = ""))
    folds.dt   <- fread(paste("../../training_data/", dataset, "/folds.csv", sep = ""))

    # get number of folds
    n_folds <- dim(unique(folds.dt[,"fold"]))[1]
    
    # filter censored intervals
    target.dt  <- target.dt[!(min.log.lambda == -Inf & max.log.lambda == Inf)]
    feature.dt <- feature.dt[sequenceID %in% target.dt$sequenceID]
    folds.dt   <- folds.dt[sequenceID %in% target.dt$sequenceID]

    inputset <- colnames(feature.dt)[2: (2*n_features+1)]    

    # combine
    feature.dt.all <- cbind(feature.dt[, c("sequenceID", ..inputset)], folds.dt[, "fold"])
    target.dt.all  <- cbind(target.dt, folds.dt[, "fold"])

    for (test.fold in 1:n_folds) {
        # get train and test data
        feature.mat.train <- as.matrix(feature.dt.all[feature.dt.all$fold != test.fold, ..inputset])
        feature.mat.test  <- as.matrix(feature.dt.all[feature.dt.all$fold == test.fold, ..inputset])
        target.mat.train  <- as.matrix(target.dt.all[target.dt.all$fold != test.fold, c("min.log.lambda", "max.log.lambda")])
        
        # train model with error handling
        fit <- try(
            IntervalRegressionUnregularized(
                feature.mat = feature.mat.train,
                target.mat = target.mat.train
            ),
            silent = TRUE
        )
        
        # check if model training was successful
        if (inherits(fit, "try-error")) {
            cat("Error in fold", test.fold, "for dataset", dataset, "\n")
            next
        }
        
        # get prediction from test set
        target.mat.pred <- fit$predict(feature.mat.test)
        colnames(target.mat.pred) <- "llda"

        # save prediction to csv
        prediction <- data.frame(as.matrix(feature.dt.all[feature.dt.all$fold == test.fold, "sequenceID"]), target.mat.pred)
        write.csv(prediction, file = paste("predictions/", paste(category, dataset, test.fold, n_features, "csv", sep = "."), sep=''), row.names = FALSE)
    }
}

Error in fold 1 for dataset H3K9me3_TDH_BP 
